merge.factor (default 10),
- * max.buffered (default no flush), max.field.length (default
- * 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0],
+ * max.buffered (default no flush), compound (default true), ram.flush.mb [default 0],
* merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy),
* merge.scheduler (default
* org.apache.lucene.index.ConcurrentMergeScheduler),
@@ -153,7 +152,6 @@
logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
}
}
- iwConf.setMaxFieldLength(config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH));
final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
final int maxBuffered = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED);
if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1060287)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -784,7 +784,7 @@
public void testHighFreqTerm() throws IOException {
MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000000).setRAMBufferSizeMB(0.01));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01));
// Massive doc that has 128 K a's
StringBuilder b = new StringBuilder(1024*1024);
for(int i=0;i<4096;i++) {
@@ -1236,30 +1236,7 @@
writer.close();
dir.close();
}
-
- // LUCENE-1084: test user-specified field length
- public void testUserSpecifiedMaxFieldLength() throws IOException {
- Directory dir = newDirectory();
-
- IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000));
-
- Document doc = new Document();
- StringBuilder b = new StringBuilder();
- for(int i=0;i<10000;i++)
- b.append(" a");
- b.append(" x");
- doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED));
- writer.addDocument(doc);
- writer.close();
-
- IndexReader reader = IndexReader.open(dir, true);
- Term t = new Term("field", "x");
- assertEquals(1, reader.docFreq(t));
- reader.close();
- dir.close();
- }
-
+
// LUCENE-325: test expungeDeletes, when 2 singular merges
// are required
public void testExpungeDeletes() throws IOException {
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (revision 1060287)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (working copy)
@@ -17,7 +17,6 @@
* limitations under the License.
*/
-import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
@@ -26,7 +25,6 @@
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
-import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
@@ -49,22 +47,12 @@
}
- private static final class MyWarmer extends IndexReaderWarmer {
- // Does not implement anything - used only for type checking on IndexWriterConfig.
-
- @Override
- public void warm(IndexReader reader) throws IOException {
- }
-
- }
-
@Test
public void testDefaults() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass());
assertNull(conf.getIndexCommit());
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
- assertEquals(IndexWriterConfig.UNLIMITED_FIELD_LENGTH, conf.getMaxFieldLength());
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
assertTrue(Similarity.getDefault() == conf.getSimilarity());
@@ -129,7 +117,6 @@
// Tests that the values of the constants does not change
assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL);
- assertEquals(Integer.MAX_VALUE, IndexWriterConfig.UNLIMITED_FIELD_LENGTH);
assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
Index: lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1060287)
+++ lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy)
@@ -63,8 +63,6 @@
fieldState.reset(docState.doc.getBoost());
- final int maxFieldLength = docState.maxFieldLength;
-
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i= maxFieldLength) {
- if (docState.infoStream != null)
- docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
- break;
- }
hasMoreTokens = stream.incrementToken();
}
Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (revision 1060287)
+++ lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (working copy)
@@ -35,7 +35,6 @@
public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException {
this.docWriter = docWriter;
docState = new DocumentsWriter.DocState();
- docState.maxFieldLength = docWriter.maxFieldLength;
docState.infoStream = docWriter.infoStream;
docState.similarity = docWriter.similarity;
docState.docWriter = docWriter;
Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1060287)
+++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy)
@@ -127,7 +127,6 @@
private boolean aborting; // True if an abort is pending
PrintStream infoStream;
- int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
Similarity similarity;
// max # simultaneous threads; if there are more than
@@ -140,7 +139,6 @@
static class DocState {
DocumentsWriter docWriter;
Analyzer analyzer;
- int maxFieldLength;
PrintStream infoStream;
Similarity similarity;
int docID;
@@ -191,6 +189,7 @@
/**
* Allocate bytes used from shared pool.
*/
+ @Override
protected byte[] newBuffer(int size) {
assert size == PER_DOC_BLOCK_SIZE;
return perDocAllocator.getByteBlock();
@@ -358,13 +357,6 @@
}
}
- synchronized void setMaxFieldLength(int maxFieldLength) {
- this.maxFieldLength = maxFieldLength;
- for(int i=0;i
- * NOTE: by default, {@link IndexWriterConfig#getMaxFieldLength()}
- * returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to
- * whether this setting fits your application.
*
* @param d
* the index directory. The index is either created or appended
@@ -689,7 +686,6 @@
directory = d;
analyzer = conf.getAnalyzer();
infoStream = defaultInfoStream;
- maxFieldLength = conf.getMaxFieldLength();
termIndexInterval = conf.getTermIndexInterval();
mergePolicy = conf.getMergePolicy();
mergePolicy.setIndexWriter(this);
@@ -768,7 +764,6 @@
docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
docWriter.setInfoStream(infoStream);
- docWriter.setMaxFieldLength(maxFieldLength);
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
@@ -987,6 +982,7 @@
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
+ @Override
public void close() throws CorruptIndexException, IOException {
close(true);
}
@@ -1177,26 +1173,8 @@
}
/**
- * The maximum number of terms that will be indexed for a single field in a
- * document. This limits the amount of memory required for indexing, so that
- * collections with very large files will not crash the indexing process by
- * running out of memory.
- * Note that this effectively truncates large documents, excluding from the
- * index terms that occur further in the document. If you know your source
- * documents are large, be sure to set this value high enough to accommodate
- * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- * is your memory, but you should anticipate an OutOfMemoryError.
- * By default, no more than 10,000 terms will be indexed for a field.
+ * Adds a document to this index.
*
- * @see MaxFieldLength
- */
- private int maxFieldLength;
-
- /**
- * Adds a document to this index. If the document contains more than
- * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field,
- * the remainder are discarded.
- *
* Note that if an Exception is hit (for example disk full)
* then the index will be consistent, but this document
* may not have been added. Furthermore, it's possible
@@ -1242,9 +1220,7 @@
/**
* Adds a document to this index, using the provided analyzer instead of the
- * value of {@link #getAnalyzer()}. If the document contains more than
- * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are
- * discarded.
+ * value of {@link #getAnalyzer()}.
*
*
See {@link #addDocument(Document)} for details on
* index and IndexWriter state after an Exception, and
Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1060287)
+++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy)
@@ -41,8 +41,6 @@
*/
public final class IndexWriterConfig implements Cloneable {
- public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;
-
/**
* Specifies the open mode for {@link IndexWriter}:
*
@@ -55,7 +53,7 @@
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
- public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
+ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
/** Denotes a flush trigger is disabled. */
public final static int DISABLE_AUTO_FLUSH = -1;
@@ -113,7 +111,6 @@
private IndexDeletionPolicy delPolicy;
private IndexCommit commit;
private OpenMode openMode;
- private int maxFieldLength;
private Similarity similarity;
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
private MergeScheduler mergeScheduler;
@@ -145,7 +142,6 @@
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
- maxFieldLength = UNLIMITED_FIELD_LENGTH;
similarity = Similarity.getDefault();
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
@@ -220,37 +216,6 @@
}
/**
- * The maximum number of terms that will be indexed for a single field in a
- * document. This limits the amount of memory required for indexing, so that
- * collections with very large files will not crash the indexing process by
- * running out of memory. This setting refers to the number of running terms,
- * not to the number of different terms.
- *
- * NOTE: this silently truncates large documents, excluding from the
- * index all terms that occur further in the document. If you know your source
- * documents are large, be sure to set this value high enough to accomodate
- * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
- * the only limit is your memory, but you should anticipate an
- * OutOfMemoryError.
- *
- * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
- */
- public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
- this.maxFieldLength = maxFieldLength;
- return this;
- }
-
- /**
- * Returns the maximum number of terms that will be indexed for a single field
- * in a document.
- *
- * @see #setMaxFieldLength(int)
- */
- public int getMaxFieldLength() {
- return maxFieldLength;
- }
-
- /**
* Expert: allows to open a certain commit point. The default is null which
* opens the latest commit point.
*/
@@ -611,7 +576,6 @@
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
sb.append("openMode=").append(openMode).append("\n");
- sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java
===================================================================
--- lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (revision 1060287)
+++ lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (working copy)
@@ -84,8 +84,7 @@
}
writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig(
Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT))
- .setMaxFieldLength(1000000).setOpenMode(
- create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
+ .setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
indexDocs(root, index, create); // add new docs
System.out.println("Optimizing index...");