Index: src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
===================================================================
--- src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (revision 670973)
+++ src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (working copy)
@@ -47,7 +47,7 @@
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
- for (int i = 0; i < 5; i++) {
+ for (int i = 0; i < 20; i++) {
Document document = new Document();
assembleDocument(document, i);
indexWriter.addDocument(document);
@@ -59,9 +59,10 @@
InstantiatedIndex ii = new InstantiatedIndex(ir);
ir.close();
- testEquals(dir, ii);
+ testEqualBehaviour(dir, ii);
}
+
public void testInstantiatedIndexWriter() throws Exception {
@@ -86,7 +87,7 @@
}
instantiatedIndexWriter.close();
- testEquals(dir, ii);
+ testEqualBehaviour(dir, ii);
testTermDocs(dir, ii);
@@ -186,6 +187,25 @@
* @param testIndex the index that is supposed to equals the apriori index.
* @throws Exception
*/
+ protected void testEqualBehaviour(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
+
+ testEquals(aprioriIndex, testIndex);
+
+ // delete a few documents
+ IndexReader ir = IndexReader.open(aprioriIndex);
+ ir.deleteDocument(3);
+ ir.deleteDocument(8);
+ ir.close();
+
+ ir = testIndex.indexReaderFactory();
+ ir.deleteDocument(3);
+ ir.deleteDocument(8);
+ ir.close();
+
+ // make sure they still equal
+ testEquals(aprioriIndex, testIndex);
+ }
+
protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
IndexReader aprioriReader = IndexReader.open(aprioriIndex);
@@ -193,6 +213,17 @@
assertEquals(aprioriReader.numDocs(), testReader.numDocs());
+ // assert field options
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED), testReader.getFieldNames(IndexReader.FieldOption.INDEXED));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), testReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET));
+ assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.UNINDEXED), testReader.getFieldNames(IndexReader.FieldOption.UNINDEXED));
+
for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
// test norms as used by normal use
Index: src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (revision 670973)
+++ src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (working copy)
@@ -16,22 +16,37 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.store.Directory;
-import java.io.IOException;
-import java.util.*;
-
/**
- * An InstantiatedIndexReader is not a snapshot in time,
- * it is completely in sync with the latest commit to the store!
- *
+ * An InstantiatedIndexReader is not a snapshot in time, it is completely in
+ * sync with the latest commit to the store!
+ *
* Consider using InstantiatedIndex as if it was immutable.
*/
-public class InstantiatedIndexReader
- extends IndexReader {
+public class InstantiatedIndexReader extends IndexReader {
private final InstantiatedIndex index;
@@ -47,40 +62,40 @@
return true;
}
-
/**
- * An InstantiatedIndexReader is not a snapshot in time,
- * it is completely in sync with the latest commit to the store!
- *
- * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
+ * An InstantiatedIndexReader is not a snapshot in time, it is completely in
+ * sync with the latest commit to the store!
+ *
+ * @return output from {@link InstantiatedIndex#getVersion()} in associated
+ * instantiated index.
*/
public long getVersion() {
return index.getVersion();
}
-
public Directory directory() {
throw new UnsupportedOperationException();
}
-
/**
* An InstantiatedIndexReader is always current!
- *
- * Check whether this IndexReader is still using the
- * current (i.e., most recently committed) version of the
- * index. If a writer has committed any changes to the
- * index since this reader was opened, this will return
- * false, in which case you must open a new
- * IndexReader in order to see the changes. See the
- * description of the autoCommit
- * flag which controls when the {@link IndexWriter}
- * actually commits changes to the index.
- *
+ *
+ * Check whether this IndexReader is still using the current (i.e., most
+ * recently committed) version of the index. If a writer has committed any
+ * changes to the index since this reader was opened, this will return
+ * false, in which case you must open a new IndexReader in
+ * order to see the changes. See the description of the autoCommit flag
+ * which controls when the {@link IndexWriter} actually commits changes to the
+ * index.
+ *
* @return always true
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
- * @throws UnsupportedOperationException unless overridden in subclass
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ * @throws UnsupportedOperationException
+ * unless overridden in subclass
*/
public boolean isCurrent() throws IOException {
return true;
@@ -92,7 +107,7 @@
private Set deletedDocuments = new HashSet();
private Set deletedDocumentNumbers = new HashSet();
- private Map> updatedNormsByFieldNameAndDocumentNumber = null;
+ private Map> updatedNormsByFieldNameAndDocumentNumber = null;
private class NormUpdate {
private int doc;
@@ -140,7 +155,7 @@
// 1. update norms
if (updatedNormsByFieldNameAndDocumentNumber != null) {
- for (Map.Entry> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
+ for (Map.Entry> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
for (NormUpdate normUpdate : e.getValue()) {
norms[normUpdate.doc] = normUpdate.value;
@@ -170,25 +185,56 @@
// ignored
}
- public Collection getFieldNames(FieldOption fldOption) {
- if (fldOption != FieldOption.ALL) {
- throw new IllegalArgumentException("Only FieldOption.ALL implemented."); // todo
+ public Collection getFieldNames(FieldOption fieldOption) {
+ Set fieldSet = new HashSet();
+ for (FieldSetting fi : index.fieldSettings.values()) {
+ if (fieldOption == IndexReader.FieldOption.ALL) {
+ fieldSet.add(fi.fieldName);
+ } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
+ && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
+ && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
+ fieldSet.add(fi.fieldName);
+ } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
+ && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
+ fieldSet.add(fi.fieldName);
+ } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
+ && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
+ fieldSet.add(fi.fieldName);
+ }
}
- return new ArrayList(getIndex().getTermsByFieldAndText().keySet());
+ return fieldSet;
}
-
/**
- * This implementation ignores the field selector! All fields are always returned
- *
- * Get the {@link org.apache.lucene.document.Document} at the nth position.
- *
- * @param n Get the document at the nth position
- * @param fieldSelector ignored
- * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
- *
+ * This implementation ignores the field selector! All fields are always
+ * returned
+ *
+ * Get the {@link org.apache.lucene.document.Document} at the nth
+ * position.
+ *
+ * @param n
+ * Get the document at the nth position
+ * @param fieldSelector
+ * ignored
+ * @return The stored fields of the
+ * {@link org.apache.lucene.document.Document} at the nth position
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ *
* @see org.apache.lucene.document.Fieldable
* @see org.apache.lucene.document.FieldSelector
* @see org.apache.lucene.document.SetBasedFieldSelector
@@ -199,18 +245,17 @@
}
public Document document(int n) throws IOException {
- if ((deletedDocumentNumbers != null
- && deletedDocumentNumbers.contains(n))
- ||
- (getIndex().getDeletedDocuments() != null
- && getIndex().getDeletedDocuments().contains(n))) {
- return null;
- }
+ if (isDeleted(n)) return null;
+ //if ((deletedDocumentNumbers != null && isDeleted(n)//deletedDocumentNumbers.contains(n))
+ // || (getIndex().getDeletedDocuments() != null && getIndex().getDeletedDocuments().contains(n))) {
+ // return null;
+ //}
return getIndex().getDocumentsByNumber()[n].getDocument();
}
/**
- * never ever touch these values. it is the true values, unless norms have been touched.
+ * never ever touch these values. it is the true values, unless norms have
+ * been touched.
*/
public byte[] norms(String field) throws IOException {
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
@@ -233,7 +278,8 @@
protected void doSetNorm(int doc, String field, byte value) throws IOException {
if (updatedNormsByFieldNameAndDocumentNumber == null) {
- updatedNormsByFieldNameAndDocumentNumber = new HashMap>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
+ updatedNormsByFieldNameAndDocumentNumber = new HashMap>(getIndex().getNormsByFieldNameAndDocumentNumber()
+ .size());
}
List list = updatedNormsByFieldNameAndDocumentNumber.get(field);
if (list == null) {
@@ -252,7 +298,6 @@
}
}
-
public TermEnum terms() throws IOException {
return new InstantiatedTermEnum(this);
}
@@ -260,11 +305,11 @@
public TermEnum terms(Term t) throws IOException {
InstantiatedTerm it = getIndex().findTerm(t);
if (it != null) {
- return new InstantiatedTermEnum(this, it.getTermIndex());
+ return new InstantiatedTermEnum(this, it.getTermIndex());
} else {
int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
if (startPos < 0) {
- startPos = -1 -startPos;
+ startPos = -1 - startPos;
}
return new InstantiatedTermEnum(this, startPos);
}
@@ -293,19 +338,16 @@
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
- if (doc.getVectorSpace() == null
- || doc.getVectorSpace().get(field) == null) {
+ if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
return null;
} else {
return new InstantiatedTermPositionVector(doc, field);
}
}
-
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
- if (doc.getVectorSpace() != null
- && doc.getVectorSpace().get(field) == null) {
+ if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
List tv = doc.getVectorSpace().get(field);
mapper.setExpectations(field, tv.size(), true, true);
for (InstantiatedTermDocumentInformation tdi : tv) {
@@ -316,7 +358,7 @@
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
- for (Map.Entry> e : doc.getVectorSpace().entrySet()) {
+ for (Map.Entry> e : doc.getVectorSpace().entrySet()) {
mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
Index: src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (revision 670973)
+++ src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (working copy)
@@ -61,7 +61,7 @@
* Returns the current Term in the enumeration.
*/
public Term term() {
- return /*term == null ? null :*/ term.getTerm();
+ return term == null ? null : term.getTerm();
}
/**
Index: src/java/org/apache/lucene/store/instantiated/FieldSettings.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/FieldSettings.java (revision 0)
+++ src/java/org/apache/lucene/store/instantiated/FieldSettings.java (revision 0)
@@ -0,0 +1,95 @@
+package org.apache.lucene.store.instantiated;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Collection;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Manage FieldSetting:s
+ */
+class FieldSettings {
+
+
+ FieldSettings() {
+ }
+
+ private Map** field name */String, FieldSetting> fieldSettings = new HashMap();
+
+ synchronized FieldSetting merge(FieldSetting fieldSetting) {
+ FieldSetting setting = fieldSettings.get(fieldSetting.fieldName);
+
+ if (setting == null) {
+ setting = new FieldSetting(fieldSetting.fieldName);
+ fieldSettings.put(fieldSetting.fieldName, setting);
+ }
+
+ if (fieldSetting.stored) {
+ setting.stored = true;
+ }
+ if (fieldSetting.compressed) {
+ setting.compressed = true;
+ }
+
+ if ("b3".equals(fieldSetting.fieldName)) {
+ System.currentTimeMillis();
+ }
+ if (fieldSetting.indexed) {
+ setting.indexed = true;
+ }
+ if (fieldSetting.tokenized) {
+ setting.tokenized = true;
+ }
+
+ if (fieldSetting.storeTermVector) {
+ setting.storeTermVector = true;
+ }
+ if (fieldSetting.storeOffsetWithTermVector) {
+ setting.storeOffsetWithTermVector = true;
+ }
+ if (fieldSetting.storePositionWithTermVector) {
+ setting.storePositionWithTermVector = true;
+ }
+
+ if (fieldSetting.storePayloads) {
+ setting.storePayloads = true;
+ }
+
+ return setting;
+
+ }
+
+ FieldSetting get(String name) {
+ return fieldSettings.get(name);
+ }
+
+ FieldSetting get(String name, boolean create) {
+ FieldSetting fieldSetting = fieldSettings.get(name);
+ if (create && fieldSetting == null) {
+ fieldSetting = new FieldSetting(name);
+ fieldSettings.put(name, fieldSetting);
+ }
+ return fieldSetting;
+ }
+
+ Collection values() {
+ return fieldSettings.values();
+ }
+
+}
Index: src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 670973)
+++ src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (working copy)
@@ -16,6 +16,22 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
@@ -28,11 +44,6 @@
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.io.StringReader;
-import java.util.*;
-
/**
* This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
*
@@ -161,6 +172,11 @@
boolean orderedTermsDirty = false;
Set dirtyTerms = new HashSet(1000);
+
+ Map fieldSettingsByFieldName = new HashMap();
+ for (String fieldName : fieldNameBuffer) {
+ fieldSettingsByFieldName.put(fieldName, new FieldSetting(fieldName));
+ }
InstantiatedDocument[] documentsByNumber = new InstantiatedDocument[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
System.arraycopy(index.getDocumentsByNumber(), 0, documentsByNumber, 0, index.getDocumentsByNumber().length);
@@ -215,7 +231,7 @@
}
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
- if (eFieldTermDocInfoFactoriesByTermText.getKey().isIndexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
+ if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
norm *= document.getDocument().getBoost();
norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
@@ -340,6 +356,7 @@
}
}
+ fieldSettingsByFieldName.putAll(documentFieldSettingsByFieldName);
}
// order document informations in dirty terms
@@ -358,6 +375,9 @@
index.setDocumentsByNumber(documentsByNumber);
index.setOrderedTerms(orderedTerms.toArray(new InstantiatedTerm[orderedTerms.size()]));
+ for (FieldSetting fieldSetting : fieldSettingsByFieldName.values()) {
+ index.fieldSettings.merge(fieldSetting);
+ }
// set term index
if (orderedTermsDirty) {
// todo optimize, only update from start position
@@ -434,45 +454,46 @@
Map fieldSettingsByFieldName = new HashMap();
for (Field field : (List) document.getDocument().getFields()) {
- FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
- if (fieldSettings == null) {
- fieldSettings = new FieldSetting();
- fieldSettings.fieldName = field.name().intern();
- fieldSettingsByFieldName.put(fieldSettings.fieldName, fieldSettings);
- fieldNameBuffer.add(fieldSettings.fieldName);
+ FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
+ if (fieldSetting == null) {
+ fieldSetting = new FieldSetting();
+ fieldSetting.fieldName = field.name().intern();
+ fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting);
+ fieldNameBuffer.add(fieldSetting.fieldName);
}
// todo: fixme: multiple fields with the same name does not mean field boost += more boost.
- fieldSettings.boost *= field.getBoost();
+ fieldSetting.boost *= field.getBoost();
//fieldSettings.dimensions++;
+
// once fieldSettings, always fieldSettings.
- if (field.getOmitNorms() != fieldSettings.omitNorms) {
- fieldSettings.omitNorms = true;
+ if (field.getOmitNorms()) {
+ fieldSetting.omitNorms = true;
}
- if (field.isIndexed() != fieldSettings.isIndexed) {
- fieldSettings.isIndexed = true;
+ if (field.isIndexed() ) {
+ fieldSetting.indexed = true;
}
- if (field.isTokenized() != fieldSettings.isTokenized) {
- fieldSettings.isTokenized = true;
+ if (field.isTokenized()) {
+ fieldSetting.tokenized = true;
}
- if (field.isCompressed() != fieldSettings.isCompressed) {
- fieldSettings.isCompressed = true;
+ if (field.isCompressed()) {
+ fieldSetting.compressed = true;
}
- if (field.isStored() != fieldSettings.isStored) {
- fieldSettings.isStored = true;
+ if (field.isStored()) {
+ fieldSetting.stored = true;
}
- if (field.isBinary() != fieldSettings.isBinary) {
- fieldSettings.isBinary = true;
+ if (field.isBinary()) {
+ fieldSetting.isBinary = true;
}
- if (field.isTermVectorStored() != fieldSettings.storeTermVector) {
- fieldSettings.storeTermVector = true;
+ if (field.isTermVectorStored()) {
+ fieldSetting.storeTermVector = true;
}
- if (field.isStorePositionWithTermVector() != fieldSettings.storePositionWithTermVector) {
- fieldSettings.storePositionWithTermVector = true;
+ if (field.isStorePositionWithTermVector()) {
+ fieldSetting.storePositionWithTermVector = true;
}
- if (field.isStoreOffsetWithTermVector() != fieldSettings.storeOffsetWithTermVector) {
- fieldSettings.storeOffsetWithTermVector = true;
+ if (field.isStoreOffsetWithTermVector()) {
+ fieldSetting.storeOffsetWithTermVector = true;
}
}
@@ -483,7 +504,7 @@
Field field = it.next();
- FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
+ FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
if (field.isIndexed()) {
@@ -505,15 +526,15 @@
next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned?
tokens.add(next); // the vector will be built on commit.
next = tokenStream.next();
- fieldSettings.fieldLength++;
- if (fieldSettings.fieldLength > maxFieldLength) {
+ fieldSetting.fieldLength++;
+ if (fieldSetting.fieldLength > maxFieldLength) {
break;
}
}
} else {
// untokenized
tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized"));
- fieldSettings.fieldLength++;
+ fieldSetting.fieldLength++;
}
}
@@ -528,7 +549,7 @@
// build term vector, term positions and term offsets
for (Map.Entry> eField_Tokens : tokensByField.entrySet()) {
- FieldSetting fieldSettings = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
+ FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
Map termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
if (termDocumentInformationFactoryByTermText == null) {
@@ -539,9 +560,9 @@
int lastOffset = 0;
// for each new field, move positions a bunch.
- if (fieldSettings.position > 0) {
+ if (fieldSetting.position > 0) {
// todo what if no analyzer set, multiple fields with same name and index without tokenization?
- fieldSettings.position += analyzer.getPositionIncrementGap(fieldSettings.fieldName);
+ fieldSetting.position += analyzer.getPositionIncrementGap(fieldSetting.fieldName);
}
for (Token token : eField_Tokens.getValue()) {
@@ -553,26 +574,27 @@
}
//termDocumentInformationFactory.termFrequency++;
- fieldSettings.position += (token.getPositionIncrement() - 1);
- termDocumentInformationFactory.termPositions.add(fieldSettings.position++);
+ fieldSetting.position += (token.getPositionIncrement() - 1);
+ termDocumentInformationFactory.termPositions.add(fieldSetting.position++);
if (token.getPayload() != null && token.getPayload().length() > 0) {
termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray());
+ fieldSetting.storePayloads = true;
} else {
termDocumentInformationFactory.payloads.add(null);
}
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
- termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSettings.offset + token.startOffset(), fieldSettings.offset + token.endOffset()));
- lastOffset = fieldSettings.offset + token.endOffset();
+ termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset()));
+ lastOffset = fieldSetting.offset + token.endOffset();
}
}
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
- fieldSettings.offset = lastOffset + 1;
+ fieldSetting.offset = lastOffset + 1;
}
}
@@ -631,51 +653,30 @@
return analyzer;
}
+ private class TermDocumentInformationFactory {
+ private LinkedList payloads = new LinkedList();
+ private LinkedList termPositions = new LinkedList();
+ private LinkedList termOffsets = new LinkedList();
+ }
- private class FieldSetting {
- private String fieldName;
- private float boost = 1;
- //private int dimensions = 0; // this is futuristic
- private int position = 0;
- private int offset;
- private int fieldLength = 0;
+ static class FieldSetting extends org.apache.lucene.store.instantiated.FieldSetting {
- private boolean storeTermVector = false;
- private boolean storeOffsetWithTermVector = false;
- private boolean storePositionWithTermVector = false;
- private boolean omitNorms = false;
- private boolean isTokenized = false;
+ float boost = 1;
+ int position = 0;
+ int offset;
+ int fieldLength = 0;
- private boolean isStored = false;
- private boolean isIndexed = false;
- private boolean isBinary = false;
- private boolean isCompressed = false;
+ boolean omitNorms = false;
+ boolean isBinary = false;
- //private float norm;
- //private byte encodedNorm;
-
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
-
- final FieldSetting that = (FieldSetting) o;
-
- return fieldName.equals(that.fieldName);
-
+ private FieldSetting() {
}
- public int hashCode() {
- return fieldName.hashCode();
+ private FieldSetting(String fieldName) {
+ super(fieldName);
}
}
- private class TermDocumentInformationFactory {
- private LinkedList payloads = new LinkedList();
- private LinkedList termPositions = new LinkedList();
- private LinkedList termOffsets = new LinkedList();
- }
-
-
}
Index: src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (revision 670973)
+++ src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (working copy)
@@ -16,15 +16,25 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.index.TermPositions;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.*;
-
/**
* Represented as a coupled graph of class instances, this
* all-in-memory index store implementation delivers search
@@ -57,6 +67,7 @@
private Map normsByFieldNameAndDocumentNumber;
+ FieldSettings fieldSettings;
/**
* Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}.
@@ -68,12 +79,14 @@
void initialize() {
// todo: clear index without loosing memory (uncouple stuff)
termsByFieldAndText = new HashMap>();
+ fieldSettings = new FieldSettings();
orderedTerms = new InstantiatedTerm[0];
documentsByNumber = new InstantiatedDocument[0];
normsByFieldNameAndDocumentNumber = new HashMap();
deletedDocuments = new HashSet();
}
+
/**
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
*
@@ -83,7 +96,9 @@
public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException {
this(sourceIndexReader, null);
}
+
+
/**
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
*
@@ -97,10 +112,63 @@
throw new IOException("Source index is not optimized.");
}
- Collection allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
initialize();
+ Collection allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
+
+ // load field options
+
+ Collection indexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED);
+ for (String name : indexedNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.indexed = true;
+ }
+ Collection indexedNoVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR);
+ for (String name : indexedNoVecNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storeTermVector = false;
+ setting.indexed = true;
+ }
+ Collection indexedVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
+ for (String name : indexedVecNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storeTermVector = true;
+ setting.indexed = true;
+ }
+ Collection payloadNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS);
+ for (String name : payloadNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storePayloads = true;
+ }
+ Collection termVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
+ for (String name : termVecNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storeTermVector = true;
+ }
+ Collection termVecOffsetNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
+ for (String name : termVecOffsetNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storeOffsetWithTermVector = true;
+ }
+ Collection termVecPosNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
+ for (String name : termVecPosNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storePositionWithTermVector = true;
+ }
+ Collection termVecPosOffNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
+ for (String name : termVecPosOffNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.storeOffsetWithTermVector = true;
+ setting.storePositionWithTermVector = true;
+ }
+ Collection unindexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
+ for (String name : unindexedNames) {
+ FieldSetting setting = fieldSettings.get(name, true);
+ setting.indexed = false;
+ }
+
+
documentsByNumber = new InstantiatedDocument[sourceIndexReader.numDocs()];
// create documents
@@ -129,6 +197,8 @@
}
}
+
+
// create norms
for (String fieldName : allFieldNames) {
if (fields == null || fields.contains(fieldName)) {
Index: src/java/org/apache/lucene/store/instantiated/FieldSetting.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/FieldSetting.java (revision 0)
+++ src/java/org/apache/lucene/store/instantiated/FieldSetting.java (revision 0)
@@ -0,0 +1,61 @@
+package org.apache.lucene.store.instantiated;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * For non package access see {@link org.apache.lucene.index.IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
+ */
+class FieldSetting {
+ String fieldName;
+
+ boolean storeTermVector = false;
+ boolean storeOffsetWithTermVector = false;
+ boolean storePositionWithTermVector = false;
+ boolean storePayloads = false;
+
+ boolean stored = false;
+ boolean indexed = false;
+ boolean tokenized = false;
+ boolean compressed = false;
+
+ FieldSetting() {
+ }
+
+
+ FieldSetting(String fieldName) {
+ this.fieldName = fieldName;
+ }
+
+ public boolean equals(Object o) {
+ if (this == o)
+ return true;
+ if (o == null || getClass() != o.getClass())
+ return false;
+
+ final FieldSetting that = (FieldSetting) o;
+
+ return fieldName.equals(that.fieldName);
+
+ }
+
+ public int hashCode() {
+ return fieldName.hashCode();
+ }
+
+
+}
Index: src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
===================================================================
--- src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (revision 670973)
+++ src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (working copy)
@@ -121,16 +121,11 @@
} else {
return true;
}
-
-
}
/**
* Does nothing
*/
public void close() {
-
}
-
-
}