Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 918874)
+++ CHANGES.txt (working copy)
@@ -76,6 +76,15 @@
use by external code. In addition it offers a matchExtension method which
callers can use to query whether a certain file matches a certain extension.
(Shai Erera via Mike McCandless)
+
+* LUCENE-2294: IndexWriter constructors have been deprecated in favor of a
+ single ctor which accepts IndexWriterConfig and a Directory. You can set all
+ the parameters related to IndexWriter on IndexWriterConfig. The different
+ setter/getter methods were deprecated as well. One should call
+ writer.getConfig().getXYZ() to query for a parameter XYZ.
+ Additionally, the setter/getter related to MergePolicy were deprecated as
+ well. One should interact with the MergePolicy directly.
+ (Shai Erera via ?)
Bug fixes
Index: src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 918874)
+++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy)
@@ -182,7 +182,7 @@
abstract DocConsumer getChain(DocumentsWriter documentsWriter);
}
- static final IndexingChain DefaultIndexingChain = new IndexingChain() {
+ static final IndexingChain defaultIndexingChain = new IndexingChain() {
@Override
DocConsumer getChain(DocumentsWriter documentsWriter) {
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 918874)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -19,7 +19,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
@@ -29,6 +29,7 @@
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Closeable;
@@ -179,10 +180,11 @@
/**
* Default value for the write lock timeout (1,000).
* @see #setDefaultWriteLockTimeout
+ * @deprecated use {@link IndexWriterConfig#WRITE_LOCK_TIMEOUT} instead
*/
- public static long WRITE_LOCK_TIMEOUT = 1000;
+ public static long WRITE_LOCK_TIMEOUT = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
- private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
+ private long writeLockTimeout;
/**
* Name of the write lock in the index.
@@ -191,36 +193,43 @@
/**
* Value to denote a flush trigger is disabled
+ * @deprecated use {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} instead
*/
- public final static int DISABLE_AUTO_FLUSH = -1;
+ public final static int DISABLE_AUTO_FLUSH = IndexWriterConfig.DISABLE_AUTO_FLUSH;
/**
* Disabled by default (because IndexWriter flushes by RAM usage
* by default). Change using {@link #setMaxBufferedDocs(int)}.
+ * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DOCS} instead.
*/
- public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
+ public final static int DEFAULT_MAX_BUFFERED_DOCS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
/**
* Default value is 16 MB (which means flush when buffered
* docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
+ * @deprecated use {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} instead.
*/
- public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
+ public final static double DEFAULT_RAM_BUFFER_SIZE_MB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
/**
* Disabled by default (because IndexWriter flushes by RAM usage
* by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
+ * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DELETE_TERMS} instead
*/
- public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
+ public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
/**
* Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
+ *
+ * @deprecated see {@link IndexWriterConfig}
*/
public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
/**
* Default value is 128. Change using {@link #setTermIndexInterval(int)}.
+ * @deprecated use {@link IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL} instead.
*/
- public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
+ public final static int DEFAULT_TERM_INDEX_INTERVAL = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
/**
* Absolute hard maximum length for a term. If a term
@@ -244,10 +253,11 @@
private int messageID = -1;
volatile private boolean hitOOM;
- private Directory directory; // where this index resides
- private Analyzer analyzer; // how to analyze text
+ private final Directory directory; // where this index resides
+ private final Analyzer analyzer; // how to analyze text
- private Similarity similarity = Similarity.getDefault(); // how to normalize
+ // TODO (4.0): this should be made final once the setter is out
+ private /*final*/Similarity similarity = Similarity.getDefault(); // how to normalize
private volatile long changeCount; // increments every time a change is completed
private long lastCommitChangeCount; // last changeCount that was committed
@@ -270,7 +280,8 @@
private Lock writeLock;
- private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+ // TODO (4.0): this should be made final once the setter is out
+ private /*final*/int termIndexInterval;
private boolean closed;
private boolean closing;
@@ -280,7 +291,8 @@
private HashSet
This defaults to the current value of {@link Similarity#getDefault()}.
+ * @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
*/
public Similarity getSimilarity() {
ensureOpen();
- return this.similarity;
+ return similarity;
}
/** Expert: Set the interval between indexed terms. Large values cause less
@@ -827,15 +852,20 @@
* must be scanned for each random term access.
*
* @see #DEFAULT_TERM_INDEX_INTERVAL
+ * @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
*/
public void setTermIndexInterval(int interval) {
ensureOpen();
this.termIndexInterval = interval;
+ // Required so config.getSimilarity returns the right value. But this will
+ // go away together with the method in 4.0.
+ config.setTermIndexInterval(interval);
}
/** Expert: Return the interval between indexed terms.
*
* @see #setTermIndexInterval(int)
+ * @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
*/
public int getTermIndexInterval() {
// We pass false because this method is called by SegmentMerger while we are in the process of closing
@@ -864,10 +894,13 @@
* if it does not exist and create is
* false or if there is any other low-level
* IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
*/
public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, create, null, mfl.getLimit(), null, null);
+ this(d, new IndexWriterConfig(Version.LUCENE_31).setAnalyzer(a).setOpenMode(
+ create ? OpenMode.CREATE : OpenMode.APPEND).setMaxFieldLength(
+ mfl.getLimit()));
}
/**
@@ -887,10 +920,12 @@
* @throws IOException if the directory cannot be
* read/written to or if there is any other low-level
* IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
*/
public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, null, mfl.getLimit(), null, null);
+ this(d, new IndexWriterConfig(Version.LUCENE_31).setAnalyzer(a)
+ .setMaxFieldLength(mfl.getLimit()));
}
/**
@@ -910,10 +945,13 @@
* @throws IOException if the directory cannot be
* read/written to or if there is any other low-level
* IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
*/
public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, deletionPolicy, mfl.getLimit(), null, null);
+ this(d, new IndexWriterConfig(Version.LUCENE_31).setAnalyzer(a)
+ .setMaxFieldLength(mfl.getLimit()).setIndexDeletionPolicy(
+ deletionPolicy));
}
/**
@@ -939,46 +977,16 @@
* if it does not exist and create is
* false or if there is any other low-level
* IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
*/
public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, create, deletionPolicy, mfl.getLimit(), null, null);
+ this(d, new IndexWriterConfig(Version.LUCENE_31).setAnalyzer(a).setOpenMode(
+ create ? OpenMode.CREATE : OpenMode.APPEND).setMaxFieldLength(
+ mfl.getLimit()).setIndexDeletionPolicy(deletionPolicy));
}
/**
- * Expert: constructs an IndexWriter with a custom {@link
- * IndexDeletionPolicy} and {@link IndexingChain},
- * for the index in d.
- * Text will be analyzed with a. If
- * create is true, then a new, empty index
- * will be created in d, replacing the index
- * already there, if any.
- *
- * @param d the index directory
- * @param a the analyzer to use
- * @param create true to create the index or overwrite
- * the existing one; false to append to the existing
- * index
- * @param deletionPolicy see above
- * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
- * @param indexingChain the {@link DocConsumer} chain to be used to
- * process documents
- * @param commit which commit to open
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (write.lock could not
- * be obtained)
- * @throws IOException if the directory cannot be read/written to, or
- * if it does not exist and create is
- * false or if there is any other low-level
- * IO error
- */
- IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, create, deletionPolicy, mfl.getLimit(), indexingChain, commit);
- }
-
- /**
* Expert: constructs an IndexWriter on specific commit
* point, with a custom {@link IndexDeletionPolicy}, for
* the index in d. Text will be analyzed
@@ -1009,44 +1017,70 @@
* if it does not exist and create is
* false or if there is any other low-level
* IO error
+ * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
*/
public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(d, a, false, deletionPolicy, mfl.getLimit(), null, commit);
+ this(d, new IndexWriterConfig(Version.LUCENE_31).setAnalyzer(a)
+ .setOpenMode(OpenMode.APPEND).setMaxFieldLength(mfl.getLimit())
+ .setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
}
- private void init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy,
- int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(d)) {
- init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit);
- } else {
- init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit);
- }
- }
-
- private void init(Directory d, Analyzer a, final boolean create,
- IndexDeletionPolicy deletionPolicy, int maxFieldLength,
- IndexingChain indexingChain, IndexCommit commit)
- throws CorruptIndexException, LockObtainFailedException, IOException {
-
+ /**
+ * Constructs a new IndexWriter per the settings given in conf.
+ * Note that the passed in {@link IndexWriterConfig} is cloned and thus making
+ * changes to it after IndexWriter has been instantiated will not affect
+ * IndexWriter. Additionally, calling {@link #getConfig()} and changing the
+ * parameters does not affect that IndexWriter instance.
+ *
+ * @param d
+ * the index directory. The index is either created or appended
+ * according conf.getOpenMode().
+ * @param conf
+ * the configuration settings according to which IndexWriter should
+ * be initalized.
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws LockObtainFailedException
+ * if another writer has this index open (write.lock
+ * could not be obtained)
+ * @throws IOException
+ * if the directory cannot be read/written to, or if it does not
+ * exist and conf.getOpenMode() is
+ * OpenMode.APPEND or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, IndexWriterConfig conf)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ config = (IndexWriterConfig) conf.clone();
directory = d;
- analyzer = a;
+ analyzer = conf.getAnalyzer();
setMessageID(defaultInfoStream);
- this.maxFieldLength = maxFieldLength;
+ maxFieldLength = conf.getMaxFieldLength();
+ termIndexInterval = conf.getTermIndexInterval();
+ writeLockTimeout = conf.getWriteLockTimeout();
+ similarity = conf.getSimilarity();
+ mergeScheduler = conf.getMergeScheduler();
- if (indexingChain == null)
- indexingChain = DocumentsWriter.DefaultIndexingChain;
+ OpenMode mode = conf.getOpenMode();
+ boolean create;
+ if (mode == OpenMode.CREATE) {
+ create = true;
+ } else if (mode == OpenMode.APPEND) {
+ create = false;
+ } else {
+ // CREATE_OR_APPEND - create only if an index does not exist
+ create = !IndexReader.indexExists(directory);
+ }
if (create) {
// Clear the write lock in case it's leftover:
directory.clearLock(WRITE_LOCK_NAME);
}
- Lock writeLock = directory.makeLock(WRITE_LOCK_NAME);
+ writeLock = directory.makeLock(WRITE_LOCK_NAME);
if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
throw new LockObtainFailedException("Index locked for write: " + writeLock);
- this.writeLock = writeLock; // save it
try {
if (create) {
@@ -1077,6 +1111,7 @@
} else {
segmentInfos.read(directory);
+ IndexCommit commit = conf.getIndexCommit();
if (commit != null) {
// Swap out all segments, but, keep metadata in
// SegmentInfos, like version & generation, to
@@ -1100,14 +1135,14 @@
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, indexingChain);
+ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain());
docWriter.setInfoStream(infoStream);
docWriter.setMaxFieldLength(maxFieldLength);
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
deleter = new IndexFileDeleter(directory,
- deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
+ conf.getIndexDeletionPolicy(),
segmentInfos, infoStream, docWriter);
if (deleter.startingCommitDeleted)
@@ -1117,20 +1152,22 @@
// segments_N file.
changeCount++;
+ docWriter.setMaxBufferedDeleteTerms(conf.getMaxBufferedDeleteTerms());
+ docWriter.setRAMBufferSizeMB(conf.getRAMBufferSizeMB());
+ docWriter.setMaxBufferedDocs(conf.getMaxBufferedDocs());
pushMaxBufferedDocs();
if (infoStream != null) {
- message("init: create=" + create);
messageState();
}
} catch (IOException e) {
- this.writeLock.release();
- this.writeLock = null;
+ writeLock.release();
+ writeLock = null;
throw e;
}
}
-
+
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
assert !rollbackSegmentInfos.hasExternalSegments(directory);
@@ -1141,6 +1178,20 @@
}
/**
+ * Returns the {@link IndexWriterConfig} that was passed to
+ * {@link #IndexWriter(Directory, IndexWriterConfig)}. This allows querying
+ * IndexWriter's settings.
+ *
+ * NOTE: setting any parameter on the returned instance has not effect + * on the IndexWriter instance. If you need to change those settings after + * IndexWriter has been created, you need to instantiate a new IndexWriter. + */ + public IndexWriterConfig getConfig() { + ensureOpen(); + return config; + } + + /** * Expert: set the merge policy used by this writer. */ public void setMergePolicy(MergePolicy mp) { @@ -1167,6 +1218,7 @@ /** * Expert: set the merge scheduler used by this writer. + * @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead */ synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException { ensureOpen(); @@ -1180,12 +1232,16 @@ this.mergeScheduler = mergeScheduler; if (infoStream != null) message("setMergeScheduler " + mergeScheduler); + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setMergeScheduler(mergeScheduler); } /** - * Expert: returns the current MergePolicy in use by this + * Expert: returns the current MergeScheduler in use by this * writer. - * @see #setMergePolicy + * @see #setMergeScheduler(MergeScheduler) + * @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead */ public MergeScheduler getMergeScheduler() { ensureOpen(); @@ -1211,6 +1267,7 @@ * LogByteSizeMergePolicy}) also allows you to set this * limit by net size (in MB) of the segment, using {@link * LogByteSizeMergePolicy#setMaxMergeMB}.
+ * @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly. */ public void setMaxMergeDocs(int maxMergeDocs) { getLogMergePolicy().setMaxMergeDocs(maxMergeDocs); @@ -1226,6 +1283,7 @@ * Otherwise an IllegalArgumentException is thrown. * * @see #setMaxMergeDocs + * @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly. */ public int getMaxMergeDocs() { return getLogMergePolicy().getMaxMergeDocs(); @@ -1244,6 +1302,7 @@ * is your memory, but you should anticipate an OutOfMemoryError. * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms * will be indexed for a field. + * @deprecated use {@link IndexWriterConfig#setMaxFieldLength(int)} instead */ public void setMaxFieldLength(int maxFieldLength) { ensureOpen(); @@ -1251,12 +1310,16 @@ docWriter.setMaxFieldLength(maxFieldLength); if (infoStream != null) message("setMaxFieldLength " + maxFieldLength); + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setMaxFieldLength(maxFieldLength); } /** * Returns the maximum number of terms that will be * indexed for a single field in a document. * @see #setMaxFieldLength + * @deprecated use {@link IndexWriterConfig#getMaxFieldLength()} instead */ public int getMaxFieldLength() { ensureOpen(); @@ -1281,6 +1344,7 @@ * enabled but smaller than 2, or it disables maxBufferedDocs * when ramBufferSize is already disabled * @see #setRAMBufferSizeMB + * @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead. */ public void setMaxBufferedDocs(int maxBufferedDocs) { ensureOpen(); @@ -1295,6 +1359,9 @@ pushMaxBufferedDocs(); if (infoStream != null) message("setMaxBufferedDocs " + maxBufferedDocs); + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setMaxBufferedDocs(maxBufferedDocs); } /** @@ -1321,6 +1388,7 @@ * Returns the number of buffered added documents that will * trigger a flush if enabled. * @see #setMaxBufferedDocs + * @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead. */ public int getMaxBufferedDocs() { ensureOpen(); @@ -1364,6 +1432,7 @@ * @throws IllegalArgumentException if ramBufferSize is * enabled but non-positive, or it disables ramBufferSize * when maxBufferedDocs is already disabled + * @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead. */ public void setRAMBufferSizeMB(double mb) { if (mb > 2048.0) { @@ -1378,10 +1447,14 @@ docWriter.setRAMBufferSizeMB(mb); if (infoStream != null) message("setRAMBufferSizeMB " + mb); + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setRAMBufferSizeMB(mb); } /** * Returns the value set by {@link #setRAMBufferSizeMB} if enabled. + * @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead. */ public double getRAMBufferSizeMB() { return docWriter.getRAMBufferSizeMB(); @@ -1398,6 +1471,7 @@ * @throws IllegalArgumentException if maxBufferedDeleteTerms * is enabled but smaller than 1 * @see #setRAMBufferSizeMB + * @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead. */ public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { ensureOpen(); @@ -1408,12 +1482,16 @@ docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); if (infoStream != null) message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms); + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); } /** * Returns the number of buffered deleted terms that will * trigger a flush if enabled. * @see #setMaxBufferedDeleteTerms + * @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead */ public int getMaxBufferedDeleteTerms() { ensureOpen(); @@ -1434,6 +1512,7 @@ * Otherwise an IllegalArgumentException is thrown. * *This must never be less than 2. The default value is 10. + * @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly. */ public void setMergeFactor(int mergeFactor) { getLogMergePolicy().setMergeFactor(mergeFactor); @@ -1450,6 +1529,7 @@ * Otherwise an IllegalArgumentException is thrown.
* * @see #setMergeFactor + * @deprecated use {@link LogMergePolicy#getMergeFactor()} directly. */ public int getMergeFactor() { return getLogMergePolicy().getMergeFactor(); @@ -1486,15 +1566,11 @@ } private void messageState() { - message("setInfoStream: dir=" + directory + + message("dir=" + directory + " mergePolicy=" + mergePolicy + - " mergeScheduler=" + mergeScheduler + - " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB() + - " maxBufferedDocs=" + docWriter.getMaxBufferedDocs() + - " maxBuffereDeleteTerms=" + docWriter.getMaxBufferedDeleteTerms() + - " maxFieldLength=" + maxFieldLength + " index=" + segString() + - " version=" + Constants.LUCENE_VERSION); + " version=" + Constants.LUCENE_VERSION + + " config=" + config.toString()); } /** @@ -1514,15 +1590,20 @@ /** * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter. + * @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead */ public void setWriteLockTimeout(long writeLockTimeout) { ensureOpen(); this.writeLockTimeout = writeLockTimeout; + // Required so config.getSimilarity returns the right value. But this will + // go away together with the method in 4.0. + config.setWriteLockTimeout(writeLockTimeout); } /** * Returns allowed timeout when acquiring the write lock. * @see #setWriteLockTimeout + * @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()} */ public long getWriteLockTimeout() { ensureOpen(); @@ -1532,18 +1613,20 @@ /** * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in * milliseconds). + * @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead */ public static void setDefaultWriteLockTimeout(long writeLockTimeout) { - IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout; + IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout); } /** * Returns default write lock timeout for newly * instantiated IndexWriters. * @see #setDefaultWriteLockTimeout + * @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead */ public static long getDefaultWriteLockTimeout() { - return IndexWriter.WRITE_LOCK_TIMEOUT; + return IndexWriterConfig.getDefaultWriteLockTimeout(); } /** @@ -4777,9 +4860,13 @@ } /** - * Specifies maximum field length (in number of tokens/terms) in {@link IndexWriter} constructors. - * {@link #setMaxFieldLength(int)} overrides the value set by - * the constructor. + * Specifies maximum field length (in number of tokens/terms) in + * {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides + * the value set by the constructor. + * + * @deprecated use {@link IndexWriterConfig} and pass + * {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH} or your own + * value. */ public static final class MaxFieldLength { Index: src/java/org/apache/lucene/index/IndexWriterConfig.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 0) +++ src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 0) @@ -0,0 +1,512 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.util.Version; + +/** + * Holds all the configuration of {@link IndexWriter}. This object is only used + * while constructing a new IndexWriter. Those settings cannot be changed + * afterwards, except instantiating a new IndexWriter. + *+ * All setter methods return {@link IndexWriterConfig} to allow chaining + * settings conveniently. Thus someone can do: + * + *
+ * IndexWriterConfig conf = new IndexWriterConfig(analyzer); + * conf.setter1().setter2(); + *+ */ +public final class IndexWriterConfig implements Cloneable { + + public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE; + + /** + * Specifies the open mode for {@link IndexWriter}: + *
+ * NOTE: the analyzer cannot be null. If null is passed,
+ * the analyzer will be set to the default.
+ */
+ public IndexWriterConfig setAnalyzer(Analyzer analyzer) {
+ this.analyzer = analyzer == null ? new WhitespaceAnalyzer(matchVersion) : analyzer;
+ return this;
+ }
+
+ /** Returns the default analyzer to use for indexing documents. */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ /** Specifies {@link OpenMode} of that index. */
+ public IndexWriterConfig setOpenMode(OpenMode openMode) {
+ this.openMode = openMode;
+ return this;
+ }
+
+ /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
+ public OpenMode getOpenMode() {
+ return openMode;
+ }
+
+ /**
+ * Expert: allows an optional {@link IndexDeletionPolicy} implementation to be
+ * specified. You can use this to control when prior commits are deleted from
+ * the index. The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
+ * which removes all prior commits as soon as a new commit is done (this
+ * matches behavior before 2.2). Creating your own policy can allow you to
+ * explicitly keep previous "point in time" commits alive in the index for
+ * some time, to allow readers to refresh to the new commit without having the
+ * old commit deleted out from under them. This is necessary on filesystems
+ * like NFS that do not support "delete on last close" semantics, which
+ * Lucene's "point in time" search normally relies on.
+ *
+ * NOTE: the deletion policy cannot be null. If null is
+ * passed, the deletion policy will be set to the default.
+ */
+ public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) {
+ this.delPolicy = delPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : delPolicy;
+ return this;
+ }
+
+ /**
+ * Returns the {@link IndexDeletionPolicy} specified in
+ * {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
+ * {@link KeepOnlyLastCommitDeletionPolicy}/
+ */
+ public IndexDeletionPolicy getIndexDeletionPolicy() {
+ return delPolicy;
+ }
+
+ /**
+ * The maximum number of terms that will be indexed for a single field in a
+ * document. This limits the amount of memory required for indexing, so that
+ * collections with very large files will not crash the indexing process by
+ * running out of memory. This setting refers to the number of running terms,
+ * not to the number of different terms.
+ *
+ * NOTE: this silently truncates large documents, excluding + * from the index all terms that occur further in the document. If you know + * your source documents are large, be sure to set this value high enough to + * accomodate the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then the + * only limit is your memory, but you should anticipate an OutOfMemoryError. + *
+ * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
+ */
+ public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
+ this.maxFieldLength = maxFieldLength;
+ return this;
+ }
+
+ /**
+ * Returns the maximum number of terms that will be indexed for a single field
+ * in a document.
+ *
+ * @see #setMaxFieldLength(int)
+ */
+ public int getMaxFieldLength() {
+ return maxFieldLength;
+ }
+
+ /**
+ * Expert: allows to open a certain commit point. The default is null which
+ * opens the latest commit point.
+ */
+ public IndexWriterConfig setIndexCommit(IndexCommit commit) {
+ this.commit = commit;
+ return this;
+ }
+
+ /**
+ * Returns the {@link IndexCommit} as specified in
+ * {@link #setIndexCommit(IndexCommit)} or the default, null
+ * which specifies to open the latest index commit point.
+ */
+ public IndexCommit getIndexCommit() {
+ return commit;
+ }
+
+ /**
+ * Expert: set the {@link Similarity} implementation used by this IndexWriter.
+ *
+ * NOTE: the similarity cannot be null. If null is passed,
+ * the similarity will be set to the default.
+ *
+ * @see Similarity#setDefault(Similarity)
+ */
+ public IndexWriterConfig setSimilarity(Similarity similarity) {
+ this.similarity = similarity == null ? Similarity.getDefault() : similarity;
+ return this;
+ }
+
+ /**
+ * Expert: returns the {@link Similarity} implementation used by this
+ * IndexWriter. This defaults to the current value of
+ * {@link Similarity#getDefault()}.
+ */
+ public Similarity getSimilarity() {
+ return similarity;
+ }
+
+ /**
+ * Expert: set the interval between indexed terms. Large values cause less
+ * memory to be used by IndexReader, but slow random-access to terms. Small
+ * values cause more memory to be used by an IndexReader, and speed
+ * random-access to terms.
+ *
+ * This parameter determines the amount of computation required per query + * term, regardless of the number of documents that contain that term. In + * particular, it is the maximum number of other terms that must be scanned + * before a term is located and its frequency and position information may be + * processed. In a large index with user-entered query terms, query processing + * time is likely to be dominated not by term lookup but rather by the + * processing of frequency and positional data. In a small index or when many + * uncommon query terms are generated (e.g., by wildcard queries) term lookup + * may become a dominant cost. + *
+ * In particular, numUniqueTerms/interval terms are read into
+ * memory by an IndexReader, and, on average, interval/2 terms
+ * must be scanned for each random term access.
+ *
+ * @see #DEFAULT_TERM_INDEX_INTERVAL
+ */
+ public IndexWriterConfig setTermIndexInterval(int interval) {
+ this.termIndexInterval = interval;
+ return this;
+ }
+
+ /**
+ * Returns the interval between indexed terms.
+ *
+ * @see #setTermIndexInterval(int)
+ */
+ public int getTermIndexInterval() {
+ return termIndexInterval;
+ }
+
+ /**
+ * Expert: sets the merge scheduler used by this writer. The default is
+ * {@link ConcurrentMergeScheduler}.
+ *
+ * NOTE: the merge scheduler cannot be null. If null is
+ * passed, the merge scheduler will be set to the default.
+ */
+ public IndexWriterConfig setMergeScheduler(MergeScheduler mergeScheduler) {
+ this.mergeScheduler = mergeScheduler == null ? new ConcurrentMergeScheduler() : mergeScheduler;
+ return this;
+ }
+
+ /**
+ * Returns the {@link MergeScheduler} that was set by
+ * {@link #setMergeScheduler(MergeScheduler)}
+ */
+ public MergeScheduler getMergeScheduler() {
+ return mergeScheduler;
+ }
+
+ /**
+ * Sets the maximum time to wait for a write lock (in milliseconds) for this
+ * instance. You can change the default value for all instances by calling
+ * {@link #setDefaultWriteLockTimeout(long)}.
+ */
+ public IndexWriterConfig setWriteLockTimeout(long writeLockTimeout) {
+ this.writeLockTimeout = writeLockTimeout;
+ return this;
+ }
+
+ /**
+ * Returns allowed timeout when acquiring the write lock.
+ *
+ * @see #setWriteLockTimeout(long)
+ */
+ public long getWriteLockTimeout() {
+ return writeLockTimeout;
+ }
+
+ /**
+ * Determines the minimal number of delete terms required before the buffered
+ * in-memory delete terms are applied and flushed. If there are documents
+ * buffered in memory at the time, they are merged and a new segment is
+ * created.
+
+ *
Disabled by default (writer flushes by RAM usage). + * + * @throws IllegalArgumentException if maxBufferedDeleteTerms + * is enabled but smaller than 1 + * @see #setRAMBufferSizeMB + */ + public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { + if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH + && maxBufferedDeleteTerms < 1) + throw new IllegalArgumentException( + "maxBufferedDeleteTerms must at least be 1 when enabled"); + this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; + return this; + } + + /** + * Returns the number of buffered deleted terms that will trigger a flush if + * enabled. + * + * @see #setMaxBufferedDeleteTerms(int) + */ + public int getMaxBufferedDeleteTerms() { + return maxBufferedDeleteTerms; + } + + /** + * Determines the amount of RAM that may be used for buffering added documents + * and deletions before they are flushed to the Directory. Generally for + * faster indexing performance it's best to flush by RAM usage instead of + * document count and use as large a RAM buffer as you can. + * + *
+ * When this is set, the writer will flush whenever buffered documents and + * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent + * triggering a flush due to RAM usage. Note that if flushing by document + * count is also enabled, then the flush will be triggered by whichever comes + * first. + * + *
+ * NOTE: the account of RAM usage for pending deletions is only + * approximate. Specifically, if you delete by Query, Lucene currently has no + * way to measure the RAM usage of individual Queries so the accounting will + * under-estimate and you should compensate by either calling commit() + * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} + * to flush by count instead of RAM usage (each buffered delete Query counts + * as one). + * + *
+ * NOTE: because IndexWriter uses ints when managing its
+ * internal storage, the absolute maximum value for this setting is somewhat
+ * less than 2048 MB. The precise limit depends on various factors, such as
+ * how large your documents are, how many fields have norms, etc., so it's
+ * best to set this value comfortably under 2048.
+ *
+ *
+ * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. + * + * @throws IllegalArgumentException + * if ramBufferSize is enabled but non-positive, or it disables + * ramBufferSize when maxBufferedDocs is already disabled + */ + public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { + if (ramBufferSizeMB > 2048.0) { + throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB + + " is too large; should be comfortably less than 2048"); + } + if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) + throw new IllegalArgumentException( + "ramBufferSize should be > 0.0 MB when enabled"); + if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH) + throw new IllegalArgumentException( + "at least one of ramBufferSize and maxBufferedDocs must be enabled"); + this.ramBufferSizeMB = ramBufferSizeMB; + return this; + } + + /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */ + public double getRAMBufferSizeMB() { + return ramBufferSizeMB; + } + + /** + * Determines the minimal number of documents required before the buffered + * in-memory documents are flushed as a new Segment. Large values generally + * give faster indexing. + * + *
+ * When this is set, the writer will flush every maxBufferedDocs added + * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a + * flush due to number of buffered documents. Note that if flushing by RAM + * usage is also enabled, then the flush will be triggered by whichever comes + * first. + * + *
+ * Disabled by default (writer flushes by RAM usage).
+ *
+ * @see #setRAMBufferSizeMB(double)
+ *
+ * @throws IllegalArgumentException
+ * if maxBufferedDocs is enabled but smaller than 2, or it disables
+ * maxBufferedDocs when ramBufferSize is already disabled
+ */
+ public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
+ if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
+ throw new IllegalArgumentException(
+ "maxBufferedDocs must at least be 2 when enabled");
+ if (maxBufferedDocs == DISABLE_AUTO_FLUSH
+ && ramBufferSizeMB == DISABLE_AUTO_FLUSH)
+ throw new IllegalArgumentException(
+ "at least one of ramBufferSize and maxBufferedDocs must be enabled");
+ this.maxBufferedDocs = maxBufferedDocs;
+ return this;
+ }
+
+ /**
+ * Returns the number of buffered added documents that will trigger a flush if
+ * enabled.
+ *
+ * @see #setMaxBufferedDocs(int)
+ */
+ public int getMaxBufferedDocs() {
+ return maxBufferedDocs;
+ }
+
+ /** Expert: sets the {@link DocConsumer} chain to be used to process documents. */
+ IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
+ this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
+ return this;
+ }
+
+ /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
+ IndexingChain getIndexingChain() {
+ return indexingChain;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("matchVersion=").append(matchVersion).append("\n");
+ sb.append("analyzer=").append(analyzer.getClass().getName()).append("\n");
+ sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
+ sb.append("commit=").append(commit == null ? "null" : commit.getClass().getName()).append("\n");
+ sb.append("openMode=").append(openMode).append("\n");
+ sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
+ sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
+ sb.append("termIndexInterval=").append(termIndexInterval).append("\n");
+ sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
+ sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
+ sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
+ sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n");
+ sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
+ sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n");
+ return sb.toString();
+ }
+}
Property changes on: src\java\org\apache\lucene\index\IndexWriterConfig.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: src/test/org/apache/lucene/index/TestIndexWriterConfig.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriterConfig.java (revision 0)
+++ src/test/org/apache/lucene/index/TestIndexWriterConfig.java (revision 0)
@@ -0,0 +1,263 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.junit.Assert.*;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCaseJ4;
+import org.junit.Test;
+
+public class TestIndexWriterConfig extends LuceneTestCaseJ4 {
+
+ private static final class MySimilarity extends DefaultSimilarity {
+ // Does not implement anything - used only for type checking on IndexWriterConfig.
+ }
+
+ private static final class MyIndexingChain extends IndexingChain {
+ // Does not implement anything - used only for type checking on IndexWriterConfig.
+
+ @Override
+ DocConsumer getChain(DocumentsWriter documentsWriter) {
+ return null;
+ }
+
+ }
+
+ @Test
+ public void testDefaults() throws Exception {
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT);
+ assertEquals(WhitespaceAnalyzer.class, conf.getAnalyzer().getClass());
+ assertNull(conf.getIndexCommit());
+ assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
+ assertEquals(IndexWriterConfig.UNLIMITED_FIELD_LENGTH, conf.getMaxFieldLength());
+ assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
+ assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
+ assertTrue(Similarity.getDefault() == conf.getSimilarity());
+ assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval());
+ assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
+ assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
+ assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.getMaxBufferedDeleteTerms());
+ assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(), 0.0);
+ assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs());
+ assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
+
+ // Sanity check - validate that all getters are covered.
+ Set