false, in which case you must open a new
+ * IndexReader in order to see the changes. See the
+ * description of the autoCommit
+ * flag which controls when the {@link IndexWriter}
+ * actually commits changes to the index.
+ *
+ * @return always true
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ * @throws UnsupportedOperationException unless overridden in subclass
+ */
+ public boolean isCurrent() throws IOException {
+ return true;
+ }
+
+ public InstantiatedIndex getIndex() {
+ return index;
+ }
+
+ private Setnth position.
+ *
+ * @param n Get the document at the nth position
+ * @param fieldSelector ignored
+ * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ *
+ * @see org.apache.lucene.document.Fieldable
+ * @see org.apache.lucene.document.FieldSelector
+ * @see org.apache.lucene.document.SetBasedFieldSelector
+ * @see org.apache.lucene.document.LoadFirstFieldSelector
+ */
+ public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n);
+ }
+
+ public Document document(int n) throws IOException {
+ if ((deletedDocumentNumbers != null
+ && deletedDocumentNumbers.contains(n))
+ ||
+ (getIndex().getDeletedDocuments() != null
+ && getIndex().getDeletedDocuments().contains(n))) {
+ return null;
+ }
+ return getIndex().getDocumentsByNumber()[n].getDocument();
+ }
+
+ /**
+ * never ever touch these values. it is the true values, unless norms have been touched.
+ */
+ public byte[] norms(String field) throws IOException {
+ byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
+ if (updatedNormsByFieldNameAndDocumentNumber != null) {
+ norms = norms.clone();
+ List
+ * [Term]-- {0..*} | {0..*} --(field)[Document]
+ * <<ordered>>
+ * |
+ * [TermDocumentInformation]
+ * +payloads
+ * +termPositions
+ * +termOffsets
+ *
+ *
+ */
+public class InstantiatedTermDocumentInformation
+ implements Serializable {
+
+ private static final long serialVersionUID = 1l;
+
+ public static final ComparatorThis is + * invalid until {@link #next()} is called for + * the first time. + */ + public int nextPosition() { + currentTermPositionIndex++; + // if you get an array out of index exception here, + // it might be due to currentDocumentInformation.getIndexFromTerm not beeing set!! + return currentDocumentInformation.getTermPositions()[currentTermPositionIndex]; + } + + private int currentTermPositionIndex; + + /** + * Moves to the next pair in the enumeration. + *
Returns true if there is such a next pair in the enumeration.
+ */
+ @Override
+ public boolean next() {
+ currentTermPositionIndex = -1;
+ return super.next();
+ }
+
+ /**
+ * Skips entries to the first beyond the current whose document number is
+ * greater than or equal to
Returns true iff there is such + * an entry.
Behaves as if written:
+ * boolean skipTo(int target) {
+ * do {
+ * if (!next())
+ * return false;
+ * } while (target > doc());
+ * return true;
+ * }
+ *
+ * Some implementations are considerably more efficient than that.
+ */
+ @Override
+ public boolean skipTo(int target) {
+ currentTermPositionIndex = -1;
+ return super.skipTo(target);
+ }
+}
Index: contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
===================================================================
--- contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (revision 0)
+++ contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (revision 0)
@@ -0,0 +1,136 @@
+package org.apache.lucene.store.instantiated;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+
+/**
+ * A {@link org.apache.lucene.index.TermDocs} navigating an {@link InstantiatedIndexReader}.
+ */
+public class InstantiatedTermDocs
+ implements TermDocs {
+
+ private final InstantiatedIndexReader reader;
+
+ public InstantiatedTermDocs(InstantiatedIndexReader reader) {
+ this.reader = reader;
+ }
+
+ private int currentDocumentIndex;
+ protected InstantiatedTermDocumentInformation currentDocumentInformation;
+ protected InstantiatedTerm currentTerm;
+
+
+ public void seek(Term term) {
+ currentTerm = reader.getIndex().findTerm(term);
+ currentDocumentIndex = -1;
+ }
+
+ public void seek(org.apache.lucene.index.TermEnum termEnum) {
+ seek(termEnum.term());
+ }
+
+
+ public int doc() {
+ return currentDocumentInformation.getDocument().getDocumentNumber();
+ }
+
+ public int freq() {
+ return currentDocumentInformation.getTermPositions().length;
+ }
+
+
+ public boolean next() {
+ if (currentTerm != null) {
+ currentDocumentIndex++;
+ if (currentDocumentIndex < currentTerm.getAssociatedDocuments().length) {
+ currentDocumentInformation = currentTerm.getAssociatedDocuments()[currentDocumentIndex];
+ if (reader.hasDeletions() && reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) {
+ return next();
+ } else {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+
+ public int read(int[] docs, int[] freqs) {
+ int i;
+ for (i = 0; i < docs.length; i++) {
+ if (!next()) {
+ break;
+ }
+ docs[i] = doc();
+ freqs[i] = freq();
+ }
+ return i;
+ }
+
+ /**
+ * Skips entries to the first beyond the current whose document number is
+ * greater than or equal to target. Returns true if there is such + * an entry.
Behaves as if written:
+ * boolean skipTo(int target) {
+ * do {
+ * if (!next())
+ * return false;
+ * } while (target > doc());
+ * return true;
+ * }
+ *
+ * This implementation is considerably more efficient than that.
+ *
+ */
+ public boolean skipTo(int target) {
+ if (currentTerm == null) {
+ return false;
+ }
+
+ if (currentDocumentIndex >= target) {
+ return next();
+ }
+
+ int startOffset = currentDocumentIndex >= 0 ? currentDocumentIndex : 0;
+ int pos = currentTerm.seekCeilingDocumentInformationIndex(target, startOffset);
+
+ if (pos == -1) {
+ return false;
+ }
+
+ currentDocumentInformation = currentTerm.getAssociatedDocuments()[pos];
+ currentDocumentIndex = pos;
+ if (reader.hasDeletions() && reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) {
+ return next();
+ } else {
+ return true;
+ }
+
+
+ }
+
+ /**
+ * Does nothing
+ */
+ public void close() {
+
+ }
+
+
+}
Index: contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
===================================================================
--- contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 0)
+++ contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 0)
@@ -0,0 +1,681 @@
+package org.apache.lucene.store.instantiated;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.StringReader;
+import java.util.*;
+
+/**
+ * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
+ *
+ * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader} is navigating
+ * the same instances in memory as this writer is updating so searchers actice while
+ * you are committing are bound to throw exceptions.
+ *
+ * Consider using InstantiatedIndex as if it was immutable.
+ *
+ * @see org.apache.lucene.index.IndexWriter
+ */
+public class InstantiatedIndexWriter {
+
+ private PrintStream infoStream = null;
+
+ private int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
+
+ private final InstantiatedIndex index;
+ private final Analyzer analyzer;
+
+ private Similarity similarity = Similarity.getDefault(); // how to normalize;
+
+ private transient Set+ * ------------> Report sum by Prefix (MAddDocs) and Round (8 about 8 out of 160153) + * Operation round mrg buf cmpnd runCnt recsPerRun rec/s elapsedSec avgUsedMem avgTotalMem + * MAddDocs_20000 0 10 10 true 1 20000 81,4 245,68 200 325 152 268 156 928 + * MAddDocs_20000 - 1 1000 10 true - - 1 - - 20000 - - 494,1 - - 40,47 - 247 119 072 - 347 025 408 + * MAddDocs_20000 2 10 100 true 1 20000 104,8 190,81 233 895 552 363 720 704 + * MAddDocs_20000 - 3 2000 100 true - - 1 - - 20000 - - 527,2 - - 37,94 - 266 136 448 - 378 273 792 + * MAddDocs_20000 4 10 10 false 1 20000 103,2 193,75 222 089 792 378 273 792 + * MAddDocs_20000 - 5 3000 10 false - - 1 - - 20000 - - 545,2 - - 36,69 - 237 917 152 - 378 273 792 + * MAddDocs_20000 6 10 100 false 1 20000 102,7 194,67 237 018 976 378 273 792 + * MAddDocs_20000 - 7 4000 100 false - - 1 - - 20000 - - 535,8 - - 37,33 - 309 680 640 - 501 968 896 + *+ * + * @see org.apache.lucene.index.IndexWriter#setMergeFactor(int) + */ + public void setMergeFactor(int mergeFactor) { + this.mergeFactor = mergeFactor; + } + + /** + * @see org.apache.lucene.index.IndexWriter#getMergeFactor() + */ + public int getMergeFactor() { + return mergeFactor; + } + + + /** + * If non-null, information about merges and a message when + * maxFieldLength is reached will be printed to this. + */ + public void setInfoStream(PrintStream infoStream) { + this.infoStream = infoStream; + } + + + public void abort() throws IOException { + // what not + } + + + public void addIndexes(IndexReader[] readers) { + throw new RuntimeException("Not implemented"); + } + + + public PrintStream getInfoStream() { + return infoStream; + } + + + /** + * Flushes all changes to an index and closes all associated files. + */ + public void close() throws IOException { + commit(); + } + + /** + * Returns the number of documents currently in this index. + */ + public int docCount() { + // todo: not certain. see http://www.nabble.com/IndexWriter.docCount-tf3128882.html#a8669483 + return index.getDocumentsByNumber().length /* - index.getDeletedDocuments().size() */ + unflushedDocuments.size(); + } + + /** + * Locks the index and commits the buffered documents. + */ + public void commit() throws IOException { + + // todo write lock, unless held by caller + + boolean orderedTermsDirty = false; + Set
WARNING: This contrib is experimental and the APIs may change without warning.
++ Represented as a coupled graph of class instances, this + all-in-memory index store implementation delivers search + results up to a 100 times faster than the file-centric RAMDirectory + at the cost of greater RAM consumption. +
+ ++ Just as the default store implementation, InstantiatedIndex + comes with an IndexReader and IndexWriter. The latter share + many method signatures with the file-centric IndexWriter. +
+ ++ It is also possible to load the content of another index + by passing an IndexReader to the InstantiatedIndex constructor. +
+ ++ At a few thousand ~160 characters long documents + InstantiaedIndex outperforms RAMDirectory some 50x, + 15x at 100 documents of 2000 charachters length, + and is linear to RAMDirectory at 10,000 documents of 2000 characters length. +
+ +Mileage may vary depending on term saturation.
+ ++ Populated with a single document InstantiatedIndex is almost, but not quite, as fast as MemoryIndex. +
+ ++ It takes more or less the same time to populate an InstantiatedIndex + as it takes to populate a RAMDirectory. Hardly any effort has been put + in to optimizing the InstantiatedIndexWriter, only minimizing the amount + of time needed to write-lock the index has been considered. +
+ ++ Could replace any small index that could do with greater response time. + spell check a priori index, + the index of new documents exposed to user search agent queries, + to compile classifiers in machine learning environments, et c. +
+ +
+