Index: src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java =================================================================== --- src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (revision 670973) +++ src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (working copy) @@ -47,7 +47,7 @@ // create dir data IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true); - for (int i = 0; i < 5; i++) { + for (int i = 0; i < 20; i++) { Document document = new Document(); assembleDocument(document, i); indexWriter.addDocument(document); @@ -59,9 +59,10 @@ InstantiatedIndex ii = new InstantiatedIndex(ir); ir.close(); - testEquals(dir, ii); + testEqualBehaviour(dir, ii); } + public void testInstantiatedIndexWriter() throws Exception { @@ -86,7 +87,7 @@ } instantiatedIndexWriter.close(); - testEquals(dir, ii); + testEqualBehaviour(dir, ii); testTermDocs(dir, ii); @@ -186,6 +187,25 @@ * @param testIndex the index that is supposed to equals the apriori index. * @throws Exception */ + protected void testEqualBehaviour(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception { + + testEquals(aprioriIndex, testIndex); + + // delete a few documents + IndexReader ir = IndexReader.open(aprioriIndex); + ir.deleteDocument(3); + ir.deleteDocument(8); + ir.close(); + + ir = testIndex.indexReaderFactory(); + ir.deleteDocument(3); + ir.deleteDocument(8); + ir.close(); + + // make sure they still equal + testEquals(aprioriIndex, testIndex); + } + protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception { IndexReader aprioriReader = IndexReader.open(aprioriIndex); @@ -193,6 +213,17 @@ assertEquals(aprioriReader.numDocs(), testReader.numDocs()); + // assert field options + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED), testReader.getFieldNames(IndexReader.FieldOption.INDEXED)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), testReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)); + assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.UNINDEXED), testReader.getFieldNames(IndexReader.FieldOption.UNINDEXED)); + for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) { // test norms as used by normal use Index: src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (revision 670973) +++ src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (working copy) @@ -16,22 +16,37 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.*; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.store.Directory; -import java.io.IOException; -import java.util.*; - /** - * An InstantiatedIndexReader is not a snapshot in time, - * it is completely in sync with the latest commit to the store! - * + * An InstantiatedIndexReader is not a snapshot in time, it is completely in + * sync with the latest commit to the store! + * * Consider using InstantiatedIndex as if it was immutable. */ -public class InstantiatedIndexReader - extends IndexReader { +public class InstantiatedIndexReader extends IndexReader { private final InstantiatedIndex index; @@ -47,40 +62,40 @@ return true; } - /** - * An InstantiatedIndexReader is not a snapshot in time, - * it is completely in sync with the latest commit to the store! - * - * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index. + * An InstantiatedIndexReader is not a snapshot in time, it is completely in + * sync with the latest commit to the store! + * + * @return output from {@link InstantiatedIndex#getVersion()} in associated + * instantiated index. */ public long getVersion() { return index.getVersion(); } - public Directory directory() { throw new UnsupportedOperationException(); } - /** * An InstantiatedIndexReader is always current! - * - * Check whether this IndexReader is still using the - * current (i.e., most recently committed) version of the - * index. If a writer has committed any changes to the - * index since this reader was opened, this will return - * false, in which case you must open a new - * IndexReader in order to see the changes. See the - * description of the autoCommit - * flag which controls when the {@link IndexWriter} - * actually commits changes to the index. - * + * + * Check whether this IndexReader is still using the current (i.e., most + * recently committed) version of the index. If a writer has committed any + * changes to the index since this reader was opened, this will return + * false, in which case you must open a new IndexReader in + * order to see the changes. See the description of the autoCommit flag + * which controls when the {@link IndexWriter} actually commits changes to the + * index. + * * @return always true - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @throws UnsupportedOperationException unless overridden in subclass + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * @throws UnsupportedOperationException + * unless overridden in subclass */ public boolean isCurrent() throws IOException { return true; @@ -92,7 +107,7 @@ private Set deletedDocuments = new HashSet(); private Set deletedDocumentNumbers = new HashSet(); - private Map> updatedNormsByFieldNameAndDocumentNumber = null; + private Map> updatedNormsByFieldNameAndDocumentNumber = null; private class NormUpdate { private int doc; @@ -140,7 +155,7 @@ // 1. update norms if (updatedNormsByFieldNameAndDocumentNumber != null) { - for (Map.Entry> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) { + for (Map.Entry> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) { byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey()); for (NormUpdate normUpdate : e.getValue()) { norms[normUpdate.doc] = normUpdate.value; @@ -170,25 +185,56 @@ // ignored } - public Collection getFieldNames(FieldOption fldOption) { - if (fldOption != FieldOption.ALL) { - throw new IllegalArgumentException("Only FieldOption.ALL implemented."); // todo + public Collection getFieldNames(FieldOption fieldOption) { + Set fieldSet = new HashSet(); + for (FieldSetting fi : index.fieldSettings.values()) { + if (fieldOption == IndexReader.FieldOption.ALL) { + fieldSet.add(fi.fieldName); + } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { + fieldSet.add(fi.fieldName); + } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { + fieldSet.add(fi.fieldName); + } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) { + fieldSet.add(fi.fieldName); + } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { + fieldSet.add(fi.fieldName); + } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false + && fieldOption == IndexReader.FieldOption.TERMVECTOR) { + fieldSet.add(fi.fieldName); + } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { + fieldSet.add(fi.fieldName); + } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false + && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { + fieldSet.add(fi.fieldName); + } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false + && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { + fieldSet.add(fi.fieldName); + } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) + && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { + fieldSet.add(fi.fieldName); + } } - return new ArrayList(getIndex().getTermsByFieldAndText().keySet()); + return fieldSet; } - /** - * This implementation ignores the field selector! All fields are always returned - * - * Get the {@link org.apache.lucene.document.Document} at the nth position. - * - * @param n Get the document at the nth position - * @param fieldSelector ignored - * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * This implementation ignores the field selector! All fields are always + * returned + * + * Get the {@link org.apache.lucene.document.Document} at the nth + * position. + * + * @param n + * Get the document at the nth position + * @param fieldSelector + * ignored + * @return The stored fields of the + * {@link org.apache.lucene.document.Document} at the nth position + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @see org.apache.lucene.document.Fieldable * @see org.apache.lucene.document.FieldSelector * @see org.apache.lucene.document.SetBasedFieldSelector @@ -199,18 +245,17 @@ } public Document document(int n) throws IOException { - if ((deletedDocumentNumbers != null - && deletedDocumentNumbers.contains(n)) - || - (getIndex().getDeletedDocuments() != null - && getIndex().getDeletedDocuments().contains(n))) { - return null; - } + if (isDeleted(n)) return null; + //if ((deletedDocumentNumbers != null && isDeleted(n)//deletedDocumentNumbers.contains(n)) + // || (getIndex().getDeletedDocuments() != null && getIndex().getDeletedDocuments().contains(n))) { + // return null; + //} return getIndex().getDocumentsByNumber()[n].getDocument(); } /** - * never ever touch these values. it is the true values, unless norms have been touched. + * never ever touch these values. it is the true values, unless norms have + * been touched. */ public byte[] norms(String field) throws IOException { byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field); @@ -233,7 +278,8 @@ protected void doSetNorm(int doc, String field, byte value) throws IOException { if (updatedNormsByFieldNameAndDocumentNumber == null) { - updatedNormsByFieldNameAndDocumentNumber = new HashMap>(getIndex().getNormsByFieldNameAndDocumentNumber().size()); + updatedNormsByFieldNameAndDocumentNumber = new HashMap>(getIndex().getNormsByFieldNameAndDocumentNumber() + .size()); } List list = updatedNormsByFieldNameAndDocumentNumber.get(field); if (list == null) { @@ -252,7 +298,6 @@ } } - public TermEnum terms() throws IOException { return new InstantiatedTermEnum(this); } @@ -260,11 +305,11 @@ public TermEnum terms(Term t) throws IOException { InstantiatedTerm it = getIndex().findTerm(t); if (it != null) { - return new InstantiatedTermEnum(this, it.getTermIndex()); + return new InstantiatedTermEnum(this, it.getTermIndex()); } else { int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator); if (startPos < 0) { - startPos = -1 -startPos; + startPos = -1 - startPos; } return new InstantiatedTermEnum(this, startPos); } @@ -293,19 +338,16 @@ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; - if (doc.getVectorSpace() == null - || doc.getVectorSpace().get(field) == null) { + if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) { return null; } else { return new InstantiatedTermPositionVector(doc, field); } } - public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; - if (doc.getVectorSpace() != null - && doc.getVectorSpace().get(field) == null) { + if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) { List tv = doc.getVectorSpace().get(field); mapper.setExpectations(field, tv.size(), true, true); for (InstantiatedTermDocumentInformation tdi : tv) { @@ -316,7 +358,7 @@ public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; - for (Map.Entry> e : doc.getVectorSpace().entrySet()) { + for (Map.Entry> e : doc.getVectorSpace().entrySet()) { mapper.setExpectations(e.getKey(), e.getValue().size(), true, true); for (InstantiatedTermDocumentInformation tdi : e.getValue()) { mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions()); Index: src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (revision 670973) +++ src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (working copy) @@ -61,7 +61,7 @@ * Returns the current Term in the enumeration. */ public Term term() { - return /*term == null ? null :*/ term.getTerm(); + return term == null ? null : term.getTerm(); } /** Index: src/java/org/apache/lucene/store/instantiated/FieldSettings.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/FieldSettings.java (revision 0) +++ src/java/org/apache/lucene/store/instantiated/FieldSettings.java (revision 0) @@ -0,0 +1,95 @@ +package org.apache.lucene.store.instantiated; + +import java.util.HashMap; +import java.util.Map; +import java.util.Collection; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Manage FieldSetting:s + */ +class FieldSettings { + + + FieldSettings() { + } + + private Map fieldSettings = new HashMap(); + + synchronized FieldSetting merge(FieldSetting fieldSetting) { + FieldSetting setting = fieldSettings.get(fieldSetting.fieldName); + + if (setting == null) { + setting = new FieldSetting(fieldSetting.fieldName); + fieldSettings.put(fieldSetting.fieldName, setting); + } + + if (fieldSetting.stored) { + setting.stored = true; + } + if (fieldSetting.compressed) { + setting.compressed = true; + } + + if ("b3".equals(fieldSetting.fieldName)) { + System.currentTimeMillis(); + } + if (fieldSetting.indexed) { + setting.indexed = true; + } + if (fieldSetting.tokenized) { + setting.tokenized = true; + } + + if (fieldSetting.storeTermVector) { + setting.storeTermVector = true; + } + if (fieldSetting.storeOffsetWithTermVector) { + setting.storeOffsetWithTermVector = true; + } + if (fieldSetting.storePositionWithTermVector) { + setting.storePositionWithTermVector = true; + } + + if (fieldSetting.storePayloads) { + setting.storePayloads = true; + } + + return setting; + + } + + FieldSetting get(String name) { + return fieldSettings.get(name); + } + + FieldSetting get(String name, boolean create) { + FieldSetting fieldSetting = fieldSettings.get(name); + if (create && fieldSetting == null) { + fieldSetting = new FieldSetting(name); + fieldSettings.put(name, fieldSetting); + } + return fieldSetting; + } + + Collection values() { + return fieldSettings.values(); + } + +} Index: src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 670973) +++ src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (working copy) @@ -16,6 +16,22 @@ * limitations under the License. */ +import java.io.IOException; +import java.io.PrintStream; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; @@ -28,11 +44,6 @@ import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; -import java.io.IOException; -import java.io.PrintStream; -import java.io.StringReader; -import java.util.*; - /** * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism. * @@ -161,6 +172,11 @@ boolean orderedTermsDirty = false; Set dirtyTerms = new HashSet(1000); + + Map fieldSettingsByFieldName = new HashMap(); + for (String fieldName : fieldNameBuffer) { + fieldSettingsByFieldName.put(fieldName, new FieldSetting(fieldName)); + } InstantiatedDocument[] documentsByNumber = new InstantiatedDocument[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()]; System.arraycopy(index.getDocumentsByNumber(), 0, documentsByNumber, 0, index.getDocumentsByNumber().length); @@ -215,7 +231,7 @@ } termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size(); - if (eFieldTermDocInfoFactoriesByTermText.getKey().isIndexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) { + if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) { float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost; norm *= document.getDocument().getBoost(); norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); @@ -340,6 +356,7 @@ } } + fieldSettingsByFieldName.putAll(documentFieldSettingsByFieldName); } // order document informations in dirty terms @@ -358,6 +375,9 @@ index.setDocumentsByNumber(documentsByNumber); index.setOrderedTerms(orderedTerms.toArray(new InstantiatedTerm[orderedTerms.size()])); + for (FieldSetting fieldSetting : fieldSettingsByFieldName.values()) { + index.fieldSettings.merge(fieldSetting); + } // set term index if (orderedTermsDirty) { // todo optimize, only update from start position @@ -434,45 +454,46 @@ Map fieldSettingsByFieldName = new HashMap(); for (Field field : (List) document.getDocument().getFields()) { - FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name()); - if (fieldSettings == null) { - fieldSettings = new FieldSetting(); - fieldSettings.fieldName = field.name().intern(); - fieldSettingsByFieldName.put(fieldSettings.fieldName, fieldSettings); - fieldNameBuffer.add(fieldSettings.fieldName); + FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); + if (fieldSetting == null) { + fieldSetting = new FieldSetting(); + fieldSetting.fieldName = field.name().intern(); + fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting); + fieldNameBuffer.add(fieldSetting.fieldName); } // todo: fixme: multiple fields with the same name does not mean field boost += more boost. - fieldSettings.boost *= field.getBoost(); + fieldSetting.boost *= field.getBoost(); //fieldSettings.dimensions++; + // once fieldSettings, always fieldSettings. - if (field.getOmitNorms() != fieldSettings.omitNorms) { - fieldSettings.omitNorms = true; + if (field.getOmitNorms()) { + fieldSetting.omitNorms = true; } - if (field.isIndexed() != fieldSettings.isIndexed) { - fieldSettings.isIndexed = true; + if (field.isIndexed() ) { + fieldSetting.indexed = true; } - if (field.isTokenized() != fieldSettings.isTokenized) { - fieldSettings.isTokenized = true; + if (field.isTokenized()) { + fieldSetting.tokenized = true; } - if (field.isCompressed() != fieldSettings.isCompressed) { - fieldSettings.isCompressed = true; + if (field.isCompressed()) { + fieldSetting.compressed = true; } - if (field.isStored() != fieldSettings.isStored) { - fieldSettings.isStored = true; + if (field.isStored()) { + fieldSetting.stored = true; } - if (field.isBinary() != fieldSettings.isBinary) { - fieldSettings.isBinary = true; + if (field.isBinary()) { + fieldSetting.isBinary = true; } - if (field.isTermVectorStored() != fieldSettings.storeTermVector) { - fieldSettings.storeTermVector = true; + if (field.isTermVectorStored()) { + fieldSetting.storeTermVector = true; } - if (field.isStorePositionWithTermVector() != fieldSettings.storePositionWithTermVector) { - fieldSettings.storePositionWithTermVector = true; + if (field.isStorePositionWithTermVector()) { + fieldSetting.storePositionWithTermVector = true; } - if (field.isStoreOffsetWithTermVector() != fieldSettings.storeOffsetWithTermVector) { - fieldSettings.storeOffsetWithTermVector = true; + if (field.isStoreOffsetWithTermVector()) { + fieldSetting.storeOffsetWithTermVector = true; } } @@ -483,7 +504,7 @@ Field field = it.next(); - FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name()); + FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); if (field.isIndexed()) { @@ -505,15 +526,15 @@ next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned? tokens.add(next); // the vector will be built on commit. next = tokenStream.next(); - fieldSettings.fieldLength++; - if (fieldSettings.fieldLength > maxFieldLength) { + fieldSetting.fieldLength++; + if (fieldSetting.fieldLength > maxFieldLength) { break; } } } else { // untokenized tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized")); - fieldSettings.fieldLength++; + fieldSetting.fieldLength++; } } @@ -528,7 +549,7 @@ // build term vector, term positions and term offsets for (Map.Entry> eField_Tokens : tokensByField.entrySet()) { - FieldSetting fieldSettings = fieldSettingsByFieldName.get(eField_Tokens.getKey().name()); + FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name()); Map termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name())); if (termDocumentInformationFactoryByTermText == null) { @@ -539,9 +560,9 @@ int lastOffset = 0; // for each new field, move positions a bunch. - if (fieldSettings.position > 0) { + if (fieldSetting.position > 0) { // todo what if no analyzer set, multiple fields with same name and index without tokenization? - fieldSettings.position += analyzer.getPositionIncrementGap(fieldSettings.fieldName); + fieldSetting.position += analyzer.getPositionIncrementGap(fieldSetting.fieldName); } for (Token token : eField_Tokens.getValue()) { @@ -553,26 +574,27 @@ } //termDocumentInformationFactory.termFrequency++; - fieldSettings.position += (token.getPositionIncrement() - 1); - termDocumentInformationFactory.termPositions.add(fieldSettings.position++); + fieldSetting.position += (token.getPositionIncrement() - 1); + termDocumentInformationFactory.termPositions.add(fieldSetting.position++); if (token.getPayload() != null && token.getPayload().length() > 0) { termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray()); + fieldSetting.storePayloads = true; } else { termDocumentInformationFactory.payloads.add(null); } if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) { - termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSettings.offset + token.startOffset(), fieldSettings.offset + token.endOffset())); - lastOffset = fieldSettings.offset + token.endOffset(); + termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset())); + lastOffset = fieldSetting.offset + token.endOffset(); } } if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) { - fieldSettings.offset = lastOffset + 1; + fieldSetting.offset = lastOffset + 1; } } @@ -631,51 +653,30 @@ return analyzer; } + private class TermDocumentInformationFactory { + private LinkedList payloads = new LinkedList(); + private LinkedList termPositions = new LinkedList(); + private LinkedList termOffsets = new LinkedList(); + } - private class FieldSetting { - private String fieldName; - private float boost = 1; - //private int dimensions = 0; // this is futuristic - private int position = 0; - private int offset; - private int fieldLength = 0; + static class FieldSetting extends org.apache.lucene.store.instantiated.FieldSetting { - private boolean storeTermVector = false; - private boolean storeOffsetWithTermVector = false; - private boolean storePositionWithTermVector = false; - private boolean omitNorms = false; - private boolean isTokenized = false; + float boost = 1; + int position = 0; + int offset; + int fieldLength = 0; - private boolean isStored = false; - private boolean isIndexed = false; - private boolean isBinary = false; - private boolean isCompressed = false; + boolean omitNorms = false; + boolean isBinary = false; - //private float norm; - //private byte encodedNorm; - - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final FieldSetting that = (FieldSetting) o; - - return fieldName.equals(that.fieldName); - + private FieldSetting() { } - public int hashCode() { - return fieldName.hashCode(); + private FieldSetting(String fieldName) { + super(fieldName); } } - private class TermDocumentInformationFactory { - private LinkedList payloads = new LinkedList(); - private LinkedList termPositions = new LinkedList(); - private LinkedList termOffsets = new LinkedList(); - } - - } Index: src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (revision 670973) +++ src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (working copy) @@ -16,15 +16,25 @@ * limitations under the License. */ +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermPositionVector; +import org.apache.lucene.index.TermPositions; -import java.io.IOException; -import java.io.Serializable; -import java.util.*; - /** * Represented as a coupled graph of class instances, this * all-in-memory index store implementation delivers search @@ -57,6 +67,7 @@ private Map normsByFieldNameAndDocumentNumber; + FieldSettings fieldSettings; /** * Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}. @@ -68,12 +79,14 @@ void initialize() { // todo: clear index without loosing memory (uncouple stuff) termsByFieldAndText = new HashMap>(); + fieldSettings = new FieldSettings(); orderedTerms = new InstantiatedTerm[0]; documentsByNumber = new InstantiatedDocument[0]; normsByFieldNameAndDocumentNumber = new HashMap(); deletedDocuments = new HashSet(); } + /** * Creates a new instantiated index that looks just like the index in a specific state as represented by a reader. * @@ -83,7 +96,9 @@ public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException { this(sourceIndexReader, null); } + + /** * Creates a new instantiated index that looks just like the index in a specific state as represented by a reader. * @@ -97,10 +112,63 @@ throw new IOException("Source index is not optimized."); } - Collection allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL); initialize(); + Collection allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL); + + // load field options + + Collection indexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED); + for (String name : indexedNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.indexed = true; + } + Collection indexedNoVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR); + for (String name : indexedNoVecNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storeTermVector = false; + setting.indexed = true; + } + Collection indexedVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); + for (String name : indexedVecNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storeTermVector = true; + setting.indexed = true; + } + Collection payloadNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS); + for (String name : payloadNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storePayloads = true; + } + Collection termVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR); + for (String name : termVecNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storeTermVector = true; + } + Collection termVecOffsetNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET); + for (String name : termVecOffsetNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storeOffsetWithTermVector = true; + } + Collection termVecPosNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION); + for (String name : termVecPosNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storePositionWithTermVector = true; + } + Collection termVecPosOffNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET); + for (String name : termVecPosOffNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.storeOffsetWithTermVector = true; + setting.storePositionWithTermVector = true; + } + Collection unindexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.UNINDEXED); + for (String name : unindexedNames) { + FieldSetting setting = fieldSettings.get(name, true); + setting.indexed = false; + } + + documentsByNumber = new InstantiatedDocument[sourceIndexReader.numDocs()]; // create documents @@ -129,6 +197,8 @@ } } + + // create norms for (String fieldName : allFieldNames) { if (fields == null || fields.contains(fieldName)) { Index: src/java/org/apache/lucene/store/instantiated/FieldSetting.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/FieldSetting.java (revision 0) +++ src/java/org/apache/lucene/store/instantiated/FieldSetting.java (revision 0) @@ -0,0 +1,61 @@ +package org.apache.lucene.store.instantiated; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * For non package access see {@link org.apache.lucene.index.IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)} + */ +class FieldSetting { + String fieldName; + + boolean storeTermVector = false; + boolean storeOffsetWithTermVector = false; + boolean storePositionWithTermVector = false; + boolean storePayloads = false; + + boolean stored = false; + boolean indexed = false; + boolean tokenized = false; + boolean compressed = false; + + FieldSetting() { + } + + + FieldSetting(String fieldName) { + this.fieldName = fieldName; + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + final FieldSetting that = (FieldSetting) o; + + return fieldName.equals(that.fieldName); + + } + + public int hashCode() { + return fieldName.hashCode(); + } + + +} Index: src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java =================================================================== --- src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (revision 670973) +++ src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (working copy) @@ -121,16 +121,11 @@ } else { return true; } - - } /** * Does nothing */ public void close() { - } - - }