Index: solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java =================================================================== --- solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java (revision 1060287) +++ solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java (working copy) @@ -99,8 +99,7 @@ Directory dir = newFSDirectory(newDir); IndexWriter iw = new IndexWriter( dir, - new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)). - setMaxFieldLength(1000) + new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)) ); Document doc = new Document(); doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED)); Index: solr/src/test/org/apache/solr/search/TestSort.java =================================================================== --- solr/src/test/org/apache/solr/search/TestSort.java (revision 1060287) +++ solr/src/test/org/apache/solr/search/TestSort.java (working copy) @@ -63,8 +63,7 @@ IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)). - setOpenMode(IndexWriterConfig.OpenMode.CREATE). - setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH) + setOpenMode(IndexWriterConfig.OpenMode.CREATE) ); final MyDoc[] mydocs = new MyDoc[ndocs]; Index: solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java =================================================================== --- solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (revision 1060287) +++ solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (working copy) @@ -284,8 +284,7 @@ Directory dir = newFSDirectory(altIndexDir); IndexWriter iw = new IndexWriter( dir, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)). - setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH) + new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); Index: solr/src/java/org/apache/solr/update/SolrIndexConfig.java =================================================================== --- solr/src/java/org/apache/solr/update/SolrIndexConfig.java (revision 1060287) +++ solr/src/java/org/apache/solr/update/SolrIndexConfig.java (working copy) @@ -53,7 +53,6 @@ maxMergeDocs = -1; mergeFactor = -1; ramBufferSizeMB = 16; - maxFieldLength = -1; writeLockTimeout = -1; commitLockTimeout = -1; lockType = null; @@ -71,7 +70,6 @@ public final double ramBufferSizeMB; - public final int maxFieldLength; public final int writeLockTimeout; public final int commitLockTimeout; public final String lockType; @@ -95,7 +93,6 @@ mergeFactor=solrConfig.getInt(prefix+"/mergeFactor",def.mergeFactor); ramBufferSizeMB = solrConfig.getDouble(prefix+"/ramBufferSizeMB", def.ramBufferSizeMB); - maxFieldLength=solrConfig.getInt(prefix+"/maxFieldLength",def.maxFieldLength); writeLockTimeout=solrConfig.getInt(prefix+"/writeLockTimeout", def.writeLockTimeout); commitLockTimeout=solrConfig.getInt(prefix+"/commitLockTimeout", def.commitLockTimeout); lockType=solrConfig.get(prefix+"/lockType", def.lockType); @@ -153,9 +150,6 @@ if (termIndexInterval != -1) iwc.setTermIndexInterval(termIndexInterval); - if (maxFieldLength != -1) - iwc.setMaxFieldLength(maxFieldLength); - if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); Index: solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java =================================================================== --- solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (revision 1060287) +++ solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (working copy) @@ -74,7 +74,6 @@ return null; } - @SuppressWarnings("unchecked") private void loadExternalFileDictionary(SolrCore core) { try { @@ -92,7 +91,6 @@ new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()). setMaxBufferedDocs(150). setMergePolicy(mp). - setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH). setOpenMode(IndexWriterConfig.OpenMode.CREATE) ); Index: modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (revision 1060287) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (working copy) @@ -22,8 +22,16 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase { @@ -39,4 +47,26 @@ assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); } + public void testLimitTokenCountIndexWriter() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new LimitTokenCountAnalyzer(new MockAnalyzer(), 100000))); + + Document doc = new Document(); + StringBuilder b = new StringBuilder(); + for(int i=0;i<10000;i++) + b.append(" a"); + b.append(" x"); + doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + Term t = new Term("field", "x"); + assertEquals(1, reader.docFreq(t)); + reader.close(); + dir.close(); + } + } \ No newline at end of file Index: modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java =================================================================== --- modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 1060287) +++ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (working copy) @@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import java.io.IOException; - /** * Open an index writer. *
Other side effects: index writer object in perfRunData is set. @@ -41,7 +40,6 @@ public class OpenIndexTask extends PerfTask { public static final int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; - public static final int DEFAULT_MAX_FIELD_LENGTH = IndexWriterConfig.UNLIMITED_FIELD_LENGTH; public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR; public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; private String commitUserData; Index: modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 1060287) +++ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (working copy) @@ -46,8 +46,7 @@ * Create an index.
* Other side effects: index writer object in perfRunData is set.
* Relevant properties: merge.factor (default 10), - * max.buffered (default no flush), max.field.length (default - * 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0], + * max.buffered (default no flush), compound (default true), ram.flush.mb [default 0], * merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy), * merge.scheduler (default * org.apache.lucene.index.ConcurrentMergeScheduler), @@ -153,7 +152,6 @@ logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR)); } } - iwConf.setMaxFieldLength(config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH)); final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB); final int maxBuffered = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED); if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1060287) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -784,7 +784,7 @@ public void testHighFreqTerm() throws IOException { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000000).setRAMBufferSizeMB(0.01)); + TEST_VERSION_CURRENT, new MockAnalyzer()).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's StringBuilder b = new StringBuilder(1024*1024); for(int i=0;i<4096;i++) { @@ -1236,30 +1236,7 @@ writer.close(); dir.close(); } - - // LUCENE-1084: test user-specified field length - public void testUserSpecifiedMaxFieldLength() throws IOException { - Directory dir = newDirectory(); - - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxFieldLength(100000)); - - Document doc = new Document(); - StringBuilder b = new StringBuilder(); - for(int i=0;i<10000;i++) - b.append(" a"); - b.append(" x"); - doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); - writer.addDocument(doc); - writer.close(); - - IndexReader reader = IndexReader.open(dir, true); - Term t = new Term("field", "x"); - assertEquals(1, reader.docFreq(t)); - reader.close(); - dir.close(); - } - + // LUCENE-325: test expungeDeletes, when 2 singular merges // are required public void testExpungeDeletes() throws IOException { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (revision 1060287) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.lang.reflect.Modifier; @@ -26,7 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.index.DocumentsWriter.IndexingChain; -import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Similarity; @@ -49,22 +47,12 @@ } - private static final class MyWarmer extends IndexReaderWarmer { - // Does not implement anything - used only for type checking on IndexWriterConfig. - - @Override - public void warm(IndexReader reader) throws IOException { - } - - } - @Test public void testDefaults() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass()); assertNull(conf.getIndexCommit()); assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); - assertEquals(IndexWriterConfig.UNLIMITED_FIELD_LENGTH, conf.getMaxFieldLength()); assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode()); assertTrue(Similarity.getDefault() == conf.getSimilarity()); @@ -129,7 +117,6 @@ // Tests that the values of the constants does not change assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT); assertEquals(32, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL); - assertEquals(Integer.MAX_VALUE, IndexWriterConfig.UNLIMITED_FIELD_LENGTH); assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH); assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS); assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); Index: lucene/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1060287) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -63,8 +63,6 @@ fieldState.reset(docState.doc.getBoost()); - final int maxFieldLength = docState.maxFieldLength; - final boolean doInvert = consumer.start(fields, count); for(int i=0;i= maxFieldLength) { - if (docState.infoStream != null) - docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens"); - break; - } hasMoreTokens = stream.incrementToken(); } Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (revision 1060287) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (working copy) @@ -35,7 +35,6 @@ public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException { this.docWriter = docWriter; docState = new DocumentsWriter.DocState(); - docState.maxFieldLength = docWriter.maxFieldLength; docState.infoStream = docWriter.infoStream; docState.similarity = docWriter.similarity; docState.docWriter = docWriter; Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1060287) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -127,7 +127,6 @@ private boolean aborting; // True if an abort is pending PrintStream infoStream; - int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH; Similarity similarity; // max # simultaneous threads; if there are more than @@ -140,7 +139,6 @@ static class DocState { DocumentsWriter docWriter; Analyzer analyzer; - int maxFieldLength; PrintStream infoStream; Similarity similarity; int docID; @@ -191,6 +189,7 @@ /** * Allocate bytes used from shared pool. */ + @Override protected byte[] newBuffer(int size) { assert size == PER_DOC_BLOCK_SIZE; return perDocAllocator.getByteBlock(); @@ -358,13 +357,6 @@ } } - synchronized void setMaxFieldLength(int maxFieldLength) { - this.maxFieldLength = maxFieldLength; - for(int i=0;i - * NOTE: by default, {@link IndexWriterConfig#getMaxFieldLength()} - * returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to - * whether this setting fits your application. * * @param d * the index directory. The index is either created or appended @@ -689,7 +686,6 @@ directory = d; analyzer = conf.getAnalyzer(); infoStream = defaultInfoStream; - maxFieldLength = conf.getMaxFieldLength(); termIndexInterval = conf.getTermIndexInterval(); mergePolicy = conf.getMergePolicy(); mergePolicy.setIndexWriter(this); @@ -768,7 +764,6 @@ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes); docWriter.setInfoStream(infoStream); - docWriter.setMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: @@ -987,6 +982,7 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ + @Override public void close() throws CorruptIndexException, IOException { close(true); } @@ -1177,26 +1173,8 @@ } /** - * The maximum number of terms that will be indexed for a single field in a - * document. This limits the amount of memory required for indexing, so that - * collections with very large files will not crash the indexing process by - * running out of memory.

- * Note that this effectively truncates large documents, excluding from the - * index terms that occur further in the document. If you know your source - * documents are large, be sure to set this value high enough to accommodate - * the expected size. If you set it to Integer.MAX_VALUE, then the only limit - * is your memory, but you should anticipate an OutOfMemoryError.

- * By default, no more than 10,000 terms will be indexed for a field. + * Adds a document to this index. * - * @see MaxFieldLength - */ - private int maxFieldLength; - - /** - * Adds a document to this index. If the document contains more than - * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, - * the remainder are discarded. - * *

Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document * may not have been added. Furthermore, it's possible @@ -1242,9 +1220,7 @@ /** * Adds a document to this index, using the provided analyzer instead of the - * value of {@link #getAnalyzer()}. If the document contains more than - * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are - * discarded. + * value of {@link #getAnalyzer()}. * *

See {@link #addDocument(Document)} for details on * index and IndexWriter state after an Exception, and Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1060287) +++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy) @@ -41,8 +41,6 @@ */ public final class IndexWriterConfig implements Cloneable { - public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE; - /** * Specifies the open mode for {@link IndexWriter}: *

    @@ -55,7 +53,7 @@ public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND } /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ - public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here + public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here /** Denotes a flush trigger is disabled. */ public final static int DISABLE_AUTO_FLUSH = -1; @@ -113,7 +111,6 @@ private IndexDeletionPolicy delPolicy; private IndexCommit commit; private OpenMode openMode; - private int maxFieldLength; private Similarity similarity; private int termIndexInterval; // TODO: this should be private to the codec, not settable here private MergeScheduler mergeScheduler; @@ -145,7 +142,6 @@ delPolicy = new KeepOnlyLastCommitDeletionPolicy(); commit = null; openMode = OpenMode.CREATE_OR_APPEND; - maxFieldLength = UNLIMITED_FIELD_LENGTH; similarity = Similarity.getDefault(); termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here mergeScheduler = new ConcurrentMergeScheduler(); @@ -220,37 +216,6 @@ } /** - * The maximum number of terms that will be indexed for a single field in a - * document. This limits the amount of memory required for indexing, so that - * collections with very large files will not crash the indexing process by - * running out of memory. This setting refers to the number of running terms, - * not to the number of different terms. - *

    - * NOTE: this silently truncates large documents, excluding from the - * index all terms that occur further in the document. If you know your source - * documents are large, be sure to set this value high enough to accomodate - * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then - * the only limit is your memory, but you should anticipate an - * OutOfMemoryError. - *

    - * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}. - */ - public IndexWriterConfig setMaxFieldLength(int maxFieldLength) { - this.maxFieldLength = maxFieldLength; - return this; - } - - /** - * Returns the maximum number of terms that will be indexed for a single field - * in a document. - * - * @see #setMaxFieldLength(int) - */ - public int getMaxFieldLength() { - return maxFieldLength; - } - - /** * Expert: allows to open a certain commit point. The default is null which * opens the latest commit point. */ @@ -611,7 +576,6 @@ sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n"); sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); sb.append("openMode=").append(openMode).append("\n"); - sb.append("maxFieldLength=").append(maxFieldLength).append("\n"); sb.append("similarity=").append(similarity.getClass().getName()).append("\n"); sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n"); Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java =================================================================== --- lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (revision 1060287) +++ lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (working copy) @@ -84,8 +84,7 @@ } writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig( Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT)) - .setMaxFieldLength(1000000).setOpenMode( - create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND)); + .setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND)); indexDocs(root, index, create); // add new docs System.out.println("Optimizing index...");