Index: src/test/org/apache/lucene/index/TermInfosTest.java =================================================================== --- src/test/org/apache/lucene/index/TermInfosTest.java (revision 156438) +++ src/test/org/apache/lucene/index/TermInfosTest.java (working copy) @@ -86,7 +86,7 @@ Directory store = FSDirectory.getDirectory("test.store", true); FieldInfos fis = new FieldInfos(); - TermInfosWriter writer = new TermInfosWriter(store, "words", fis); + TermInfosWriter writer = new TermInfosWriter(store, "words", fis, 128); fis.add("word", false); for (int i = 0; i < keys.size(); i++) Index: src/java/org/apache/lucene/index/TermInfosWriter.java =================================================================== --- src/java/org/apache/lucene/index/TermInfosWriter.java (revision 156438) +++ src/java/org/apache/lucene/index/TermInfosWriter.java (working copy) @@ -61,20 +61,22 @@ private TermInfosWriter other = null; - TermInfosWriter(Directory directory, String segment, FieldInfos fis) + TermInfosWriter(Directory directory, String segment, FieldInfos fis, + int interval) throws IOException { - initialize(directory, segment, fis, false); - other = new TermInfosWriter(directory, segment, fis, true); + initialize(directory, segment, fis, interval, false); + other = new TermInfosWriter(directory, segment, fis, interval, true); other.other = this; } private TermInfosWriter(Directory directory, String segment, FieldInfos fis, - boolean isIndex) throws IOException { - initialize(directory, segment, fis, isIndex); + int interval, boolean isIndex) throws IOException { + initialize(directory, segment, fis, interval, isIndex); } private void initialize(Directory directory, String segment, FieldInfos fis, - boolean isi) throws IOException { + int interval, boolean isi) throws IOException { + indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis")); Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 156438) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -103,6 +103,16 @@ "10000")); + /** The default value for {@link #getTermIndexInterval()}. This is + * determined by the org.apache.lucene.termIndexInterval system + * property. The default is 128. + */ + public static final int DEFAULT_TERM_INDEX_INTERVAL = + Integer.parseInt(System.getProperty("org.apache.lucene.termIndexInterval", + "128")); + + + private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text @@ -113,6 +123,8 @@ private Lock writeLock; + private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; + /** Use compound file setting. Defaults to true, minimizing the number of * files used. Setting this to false may improve indexing performance, but * may also cause file handle problems. @@ -154,6 +166,26 @@ return this.similarity; } + /** Expert: Set the interval between indexed terms. Large values cause less + * memory to be used by IndexReader, but slow random-access to terms. Small + * values cause more memory to be used by an IndexReader, and speed + * random-access to terms. In particular, + * numUniqueTerms/interval terms are read into memory by an + * IndexReader, and, on average, interval/2 terms must be + * scanned for each random term access. + * + * @see #DEFAULT_TERM_INDEX_INTERVAL + */ + public void setTermIndexInterval(int interval) { + this.termIndexInterval = interval; + } + + /** Expert: Return the interval between indexed terms. + * + * @see #setTermIndexInterval(int) + */ + public int getTermIndexInterval() { return termIndexInterval; } + /** * Constructs an IndexWriter for the index in path. * Text will be analyzed with a. If create @@ -359,6 +391,11 @@ } } + /** Returns the Directory used by this index. */ + public Directory getDirectory() { + return directory; + } + /** Returns the analyzer used by this index. */ public Analyzer getAnalyzer() { return analyzer; @@ -408,7 +445,7 @@ */ public void addDocument(Document doc, Analyzer analyzer) throws IOException { DocumentWriter dw = - new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength); + new DocumentWriter(ramDirectory, analyzer, this); dw.setInfoStream(infoStream); String segmentName = newSegmentName(); dw.addDocument(segmentName, doc); @@ -514,7 +551,7 @@ optimize(); // start with zero or 1 seg final String mergedName = newSegmentName(); - SegmentMerger merger = new SegmentMerger(directory, mergedName); + SegmentMerger merger = new SegmentMerger(this, mergedName); final Vector segmentsToDelete = new Vector(); IndexReader sReader = null; @@ -609,7 +646,7 @@ final String mergedName = newSegmentName(); if (infoStream != null) infoStream.print("merging segments"); SegmentMerger merger = - new SegmentMerger(directory, mergedName); + new SegmentMerger(this, mergedName); final Vector segmentsToDelete = new Vector(); for (int i = minSegment; i < segmentInfos.size(); i++) { Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 156438) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -39,6 +39,7 @@ final class SegmentMerger { private Directory directory; private String segment; + private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; private Vector readers = new Vector(); private FieldInfos fieldInfos; @@ -51,7 +52,7 @@ "tvx", "tvd", "tvf" }; - /** + /** This ctor used only by test code. * * @param dir The Directory to merge the other segments into * @param name The name of the new segment @@ -61,6 +62,12 @@ segment = name; } + SegmentMerger(IndexWriter writer, String name) { + directory = writer.getDirectory(); + segment = name; + termIndexInterval = writer.getTermIndexInterval(); + } + /** * Add an IndexReader to the collection of readers that are to be merged * @param reader @@ -220,7 +227,8 @@ freqOutput = directory.createOutput(segment + ".frq"); proxOutput = directory.createOutput(segment + ".prx"); termInfosWriter = - new TermInfosWriter(directory, segment, fieldInfos); + new TermInfosWriter(directory, segment, fieldInfos, + termIndexInterval); skipInterval = termInfosWriter.skipInterval; queue = new SegmentMergeQueue(readers.size()); Index: src/java/org/apache/lucene/index/DocumentWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentWriter.java (revision 156438) +++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy) @@ -39,10 +39,11 @@ private Similarity similarity; private FieldInfos fieldInfos; private int maxFieldLength; + private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; private PrintStream infoStream; - /** - * + /** This ctor used by test code only. + * * @param directory The directory to write the document information to * @param analyzer The analyzer to use for the document * @param similarity The Similarity function @@ -56,6 +57,14 @@ this.maxFieldLength = maxFieldLength; } + DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) { + this.directory = directory; + this.analyzer = analyzer; + this.similarity = writer.getSimilarity(); + this.maxFieldLength = writer.getMaxFieldLength(); + this.termIndexInterval = writer.getTermIndexInterval(); + } + final void addDocument(String segment, Document doc) throws IOException { // write field names @@ -295,7 +304,8 @@ //open files for inverse index storage freq = directory.createOutput(segment + ".frq"); prox = directory.createOutput(segment + ".prx"); - tis = new TermInfosWriter(directory, segment, fieldInfos); + tis = new TermInfosWriter(directory, segment, fieldInfos, + termIndexInterval); TermInfo ti = new TermInfo(); String currentField = null;