Index: lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -67,7 +67,7 @@ // ignore deletions CachingSpanFilter filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE); - + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); @@ -97,7 +97,7 @@ reader = refreshReader(reader); searcher.close(); searcher = newSearcher(reader); - + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); Index: lucene/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDoc.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -111,7 +111,7 @@ public void testIndexAndMerge() throws Exception { StringWriter sw = new StringWriter(); PrintWriter out = new PrintWriter(sw, true); - + Directory directory = newFSDirectory(indexDir); IndexWriter writer = new IndexWriter( directory, @@ -136,7 +136,7 @@ SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false); printSegment(out, siMerge3); - + directory.close(); out.close(); sw.close(); @@ -170,7 +170,7 @@ siMerge3 = merge(siMerge, siMerge2, "merge3", true); printSegment(out, siMerge3); - + directory.close(); out.close(); sw.close(); @@ -207,11 +207,11 @@ final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos); - + if (useCompoundFile) { Collection filesToDelete = merger.createCompoundFile(merged + ".cfs", info); info.setUseCompoundFile(true); - for (final String fileToDelete : filesToDelete) + for (final String fileToDelete : filesToDelete) si1.dir.deleteFile(fileToDelete); } Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (working copy) @@ -34,7 +34,7 @@ * implemented properly */ public class TestIndexReaderClone extends LuceneTestCase { - + public void testCloneReadOnlySegmentReader() throws Exception { final Directory dir1 = newDirectory(); @@ -67,7 +67,7 @@ r2.close(); dir1.close(); } - + // open non-readOnly reader1, clone to non-readOnly // reader2, make sure we can change reader1 public void testCloneWriteToOrig() throws Exception { @@ -83,7 +83,7 @@ r2.close(); dir1.close(); } - + // open non-readOnly reader1, clone to non-readOnly // reader2, make sure we can change reader2 public void testCloneWriteToClone() throws Exception { @@ -105,7 +105,7 @@ dir1.close(); } - + // create single-segment index, open non-readOnly // SegmentReader, add docs, reopen to multireader, then do // delete @@ -116,7 +116,7 @@ IndexReader reader1 = IndexReader.open(dir1, false); TestIndexReaderReopen.modifyIndex(5, dir1); - + IndexReader reader2 = reader1.reopen(); assertTrue(reader1 != reader2); @@ -208,7 +208,7 @@ reader2.close(); dir1.close(); } - + private static boolean deleteWorked(int doc, IndexReader r) { boolean exception = false; try { @@ -219,7 +219,7 @@ } return !exception; } - + public void testCloneReadOnlyDirectoryReader() throws Exception { final Directory dir1 = newDirectory(); @@ -268,7 +268,7 @@ * are not the same on each reader 5. Verify the doc deleted is only in the * cloned reader 6. Try to delete a document in the original reader, an * exception should be thrown - * + * * @param r1 IndexReader to perform tests on * @throws Exception */ @@ -323,7 +323,7 @@ // need to test norms? dir1.close(); } - + public void testSegmentReaderCloseReferencing() throws Exception { final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); @@ -343,7 +343,7 @@ clonedSegmentReader.close(); dir1.close(); } - + public void testSegmentReaderDelDocsReferenceCounting() throws Exception { final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); @@ -454,16 +454,16 @@ private void assertDelDocsRefCountEquals(int refCount, SegmentReader reader) { assertEquals(refCount, reader.deletedDocsRef.get()); } - + public void testCloneSubreaders() throws Exception { final Directory dir1 = newDirectory(); - + TestIndexReaderReopen.createIndex(random, dir1, true); IndexReader reader = IndexReader.open(dir1, false); reader.deleteDocument(1); // acquire write lock IndexReader[] subs = reader.getSequentialSubReaders(); assert subs.length > 1; - + IndexReader[] clones = new IndexReader[subs.length]; for (int x=0; x < subs.length; x++) { clones[x] = (IndexReader) subs[x].clone(); @@ -483,9 +483,9 @@ IndexReader r2 = r1.clone(false); r1.deleteDocument(5); r1.decRef(); - + r1.incRef(); - + r2.close(); r1.decRef(); r1.close(); Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReader.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -60,13 +60,13 @@ public class TestIndexReader extends LuceneTestCase { - + public void testCommitUserData() throws Exception { Directory d = newDirectory(); Map commitUserData = new HashMap(); commitUserData.put("foo", "fighters"); - + // set up writer IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) @@ -74,12 +74,12 @@ for(int i=0;i<27;i++) addDocumentWithFields(writer); writer.close(); - + IndexReader r = IndexReader.open(d, false); r.deleteDocument(5); r.flush(commitUserData); r.close(); - + SegmentInfos sis = new SegmentInfos(); sis.read(d); IndexReader r2 = IndexReader.open(d, false); @@ -115,10 +115,10 @@ r3.close(); d.close(); } - + public void testIsCurrent() throws Exception { Directory d = newDirectory(); - IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); writer.close(); @@ -205,7 +205,7 @@ doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); } - + writer.close(); // verify fields again reader = IndexReader.open(d, false); @@ -224,10 +224,10 @@ assertTrue(fieldNames.contains("tvposition")); assertTrue(fieldNames.contains("tvoffset")); assertTrue(fieldNames.contains("tvpositionoffset")); - + // verify that only indexed fields were returned fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED); - assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields + assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields assertTrue(fieldNames.contains("keyword")); assertTrue(fieldNames.contains("text")); assertTrue(fieldNames.contains("unstored")); @@ -239,26 +239,26 @@ assertTrue(fieldNames.contains("tvposition")); assertTrue(fieldNames.contains("tvoffset")); assertTrue(fieldNames.contains("tvpositionoffset")); - + // verify that only unindexed fields were returned fieldNames = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED); assertEquals(2, fieldNames.size()); // the following fields assertTrue(fieldNames.contains("unindexed")); assertTrue(fieldNames.contains("unindexed2")); - - // verify index term vector fields + + // verify index term vector fields fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR); assertEquals(1, fieldNames.size()); // 1 field has term vector only assertTrue(fieldNames.contains("termvector")); - + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION); assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors assertTrue(fieldNames.contains("tvposition")); - + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET); assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors assertTrue(fieldNames.contains("tvoffset")); - + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET); assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors assertTrue(fieldNames.contains("tvpositionoffset")); @@ -366,13 +366,13 @@ reader2.close(); dir.close(); } - + public void testBinaryFields() throws IOException { Directory dir = newDirectory(); byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); - + for (int i = 0; i < 10; i++) { addDoc(writer, "document number " + (i + 1)); addDocumentWithFields(writer); @@ -589,14 +589,14 @@ reader = IndexReader.open(dir, false); reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); - + // now open reader again & set norm for doc 0 (writes to _0_2.s0) reader = IndexReader.open(dir, false); reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); assertFalse("failed to remove first generation norms file on writing second generation", dir.fileExists("_0_1.s0")); - + dir.close(); } @@ -619,7 +619,7 @@ } rmDir(fileDirName); }*/ - + public void testDeleteReaderWriterConflictOptimized() throws IOException{ deleteReaderWriterConflict(true); } @@ -802,7 +802,7 @@ // expected exception } try { - IndexWriter.unlock(dir); // this should not be done in the real world! + IndexWriter.unlock(dir); // this should not be done in the real world! } catch (LockReleaseFailedException lrfe) { writer.close(); } @@ -866,7 +866,7 @@ public void testDeleteReaderReaderConflictUnoptimized() throws IOException{ deleteReaderReaderConflict(false); } - + public void testDeleteReaderReaderConflictOptimized() throws IOException{ deleteReaderReaderConflict(true); } @@ -880,7 +880,7 @@ Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; - + // First build up a starting index: MockDirectoryWrapper startDir = newDirectory(); IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -1066,7 +1066,7 @@ } public void testDocsOutOfOrderJIRA140() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for(int i=0;i<11;i++) { addDoc(writer, "aaa"); @@ -1106,7 +1106,7 @@ public void testExceptionReleaseWriteLockJIRA768() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer, "aaa"); writer.close(); @@ -1157,7 +1157,7 @@ } catch (FileNotFoundException e) { // expected } - + dir.close(); } @@ -1315,10 +1315,10 @@ doc.add(newField("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.add(newField("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - + writer.addDocument(doc); } - + private void addDoc(IndexWriter writer, String value) throws IOException { Document doc = new Document(); doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED)); @@ -1330,7 +1330,7 @@ assertEquals("IndexReaders have different values for maxDoc.", index1.maxDoc(), index2.maxDoc()); assertEquals("Only one IndexReader has deletions.", index1.hasDeletions(), index2.hasDeletions()); assertEquals("Only one index is optimized.", index1.isOptimized(), index2.isOptimized()); - + // check field names Collection fields1 = index1.getFieldNames(FieldOption.ALL); Collection fields2 = index1.getFieldNames(FieldOption.ALL); @@ -1340,7 +1340,7 @@ while (it1.hasNext()) { assertEquals("Different field names.", it1.next(), it2.next()); } - + // check norms it1 = fields1.iterator(); while (it1.hasNext()) { @@ -1359,7 +1359,7 @@ assertSame(norms1, norms2); } } - + // check deletions final Bits delDocs1 = MultiFields.getDeletedDocs(index1); final Bits delDocs2 = MultiFields.getDeletedDocs(index2); @@ -1368,7 +1368,7 @@ delDocs1 == null || delDocs1.get(i), delDocs2 == null || delDocs2.get(i)); } - + // check stored fields for (int i = 0; i < index1.maxDoc(); i++) { if (delDocs1 == null || !delDocs1.get(i)) { @@ -1384,10 +1384,10 @@ Field curField2 = (Field) itField2.next(); assertEquals("Different fields names for doc " + i + ".", curField1.name(), curField2.name()); assertEquals("Different field values for doc " + i + ".", curField1.stringValue(), curField2.stringValue()); - } + } } } - + // check dictionary and posting lists FieldsEnum fenum1 = MultiFields.getFields(index1).iterator(); FieldsEnum fenum2 = MultiFields.getFields(index1).iterator(); @@ -1467,7 +1467,7 @@ r.close(); r2.close(); d.close(); - } + } public void testReadOnly() throws Throwable { Directory d = newDirectory(); @@ -1518,7 +1518,7 @@ IndexReader r3 = r2.reopen(); assertFalse(r3 == r2); r2.close(); - + assertFalse(r == r2); try { @@ -1602,7 +1602,7 @@ public void testNoDupCommitFileNames() throws Throwable { Directory dir = newDirectory(); - + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2)); @@ -1610,12 +1610,12 @@ writer.addDocument(createDocument("a")); writer.addDocument(createDocument("a")); writer.close(); - + Collection commits = IndexReader.listCommits(dir); for (final IndexCommit commit : commits) { Collection files = commit.getFileNames(); HashSet seen = new HashSet(); - for (final String fileName : files) { + for (final String fileName : files) { assertTrue("file " + fileName + " was duplicated", !seen.contains(fileName)); seen.add(fileName); } @@ -1820,7 +1820,7 @@ // LUCENE-2046 public void testPrepareCommitIsCurrent() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); Document doc = new Document(); @@ -1838,12 +1838,12 @@ r.close(); dir.close(); } - + // LUCENE-2753 public void testListCommits() throws Exception { Directory dir = newDirectory(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, null).setIndexDeletionPolicy(sdp)); writer.addDocument(new Document()); writer.commit(); Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -201,7 +201,7 @@ Map docs = new HashMap(); IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE) - .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) + .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)) .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); Index: lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (working copy) @@ -33,7 +33,7 @@ } writer.commit(); } - + private static IndexWriterConfig newWriterConfig() throws IOException { IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); @@ -42,7 +42,7 @@ conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); return conf; } - + public void testByteSizeLimit() throws Exception { // tests that the max merge size constraint is applied during optimize. Directory dir = new RAMDirectory(); @@ -65,7 +65,7 @@ LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20)); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); @@ -91,14 +91,14 @@ addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 3); - + writer.close(); conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); @@ -119,14 +119,14 @@ addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 5); - + writer.close(); conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); @@ -135,120 +135,120 @@ sis.read(dir); assertEquals(2, sis.size()); } - + public void testFirstSegmentTooLarge() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 5); addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 3); - + writer.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(2, sis.size()); } - + public void testAllSegmentsSmall() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 3); - + writer.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.size()); } - + public void testAllSegmentsLarge() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 3); - + writer.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(2); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(3, sis.size()); } - + public void testOneLargeOneSmall() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 3); addDocs(writer, 5); addDocs(writer, 3); addDocs(writer, 5); - + writer.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(4, sis.size()); } - + public void testMergeFactor() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 3); addDocs(writer, 3); addDocs(writer, 3); @@ -256,78 +256,78 @@ addDocs(writer, 5); addDocs(writer, 3); addDocs(writer, 3); - + writer.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); lmp.setMergeFactor(2); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + // Should only be 4 segments in the index, because of the merge factor and // max merge docs settings. SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(4, sis.size()); } - + public void testSingleNonOptimizedSegment() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 3); addDocs(writer, 5); addDocs(writer, 3); - + writer.close(); - + // delete the last document, so that the last segment is optimized. IndexReader r = IndexReader.open(dir, false); r.deleteDocument(r.numDocs() - 1); r.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(3, sis.size()); assertFalse(sis.info(2).hasDeletions()); } - + public void testSingleOptimizedSegment() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 3); - + writer.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(3); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.read(dir); @@ -336,28 +336,28 @@ public void testSingleNonOptimizedTooLargeSegment() throws Exception { Directory dir = new RAMDirectory(); - + IndexWriterConfig conf = newWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); - + addDocs(writer, 5); - + writer.close(); - + // delete the last document IndexReader r = IndexReader.open(dir, false); r.deleteDocument(r.numDocs() - 1); r.close(); - + conf = newWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.setMaxMergeDocs(2); conf.setMergePolicy(lmp); - + writer = new IndexWriter(dir, conf); writer.optimize(); writer.close(); - + // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.read(dir); Index: lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy) @@ -121,7 +121,7 @@ fieldInfos = new FieldInfos(dir, IndexFileNames.segmentFileName(seg, "", IndexFileNames.FIELD_INFOS_EXTENSION)); } - + @Override public void tearDown() throws Exception { dir.close(); @@ -130,17 +130,17 @@ private class MyTokenStream extends TokenStream { int tokenUpto; - + CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; OffsetAttribute offsetAtt; - + public MyTokenStream() { termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } - + @Override public boolean incrementToken() { if (tokenUpto >= tokens.length) Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -148,8 +148,8 @@ writer.addDocument(doc); } - + public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); @@ -262,7 +262,7 @@ if (VERBOSE) { System.out.println("TEST: config1=" + writer.getConfig()); } - + for(int j=0;j<500;j++) { addDocWithIndex(writer, j); } @@ -338,7 +338,7 @@ assertEquals("should be one document", reader2.numDocs(), 1); reader.close(); reader2.close(); - + dir.close(); } @@ -367,14 +367,14 @@ * these docs until writer is closed. */ public void testCommitOnClose() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); @@ -415,14 +415,14 @@ * and add docs to it. */ public void testCommitOnCloseAbort() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); @@ -450,7 +450,7 @@ hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("saw changes after writer.abort", 14, hits.length); searcher.close(); - + // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) @@ -567,7 +567,7 @@ * and close(). */ public void testCommitOnCloseOptimize() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once @@ -634,7 +634,7 @@ } public void testIndexNoDocuments() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); writer.close(); @@ -656,7 +656,7 @@ } public void testManyFields() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for(int j=0;j<100;j++) { Document doc = new Document(); @@ -686,7 +686,7 @@ } public void testSmallRAMBuffer() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). @@ -782,13 +782,14 @@ writer.deleteDocuments(new Term("field", "aaa" + j)); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); + if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created assertEquals(flushCount, lastFlushCount); } else if (10 == j) { - assertTrue(flushCount > lastFlushCount); + assertTrue("" + j, flushCount > lastFlushCount); lastFlushCount = flushCount; writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDeleteTerms(1); @@ -825,7 +826,7 @@ } public void testDiverseDocs() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); for(int i=0;i<3;i++) { // First, docs where every term is unique (heavy on @@ -872,12 +873,12 @@ } public void testEnablingNorms() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, pre flush for(int j=0;j<10;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 8) { f.setOmitNorms(true); } @@ -898,7 +899,7 @@ // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 26) { f.setOmitNorms(true); } @@ -918,7 +919,7 @@ } public void testHighFreqTerm() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's @@ -968,7 +969,7 @@ return myLockFactory.makeLock(name); } } - + Directory dir = new MyRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -976,7 +977,7 @@ addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("did not get right number of hits", 100, hits.length); @@ -1073,7 +1074,7 @@ infos.read(dir); assertEquals(2, infos.size()); } - } + } dir.close(); } @@ -1089,7 +1090,7 @@ Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); - + Document document = new Document(); document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); iw.addDocument(document); @@ -1343,7 +1344,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1370,12 +1371,12 @@ writer.close(); dir.close(); } - + // LUCENE-325: test expungeDeletes, when 2 singular merges // are required public void testExpungeDeletes() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH)); @@ -1537,14 +1538,14 @@ public void doAfterFlush() { afterWasCalled = true; } - + @Override protected void doBeforeFlush() throws IOException { beforeWasCalled = true; } } - + // LUCENE-1222 public void testDoBeforeAfterFlush() throws IOException { Directory dir = newDirectory(); @@ -1572,7 +1573,7 @@ } - + final String[] utf8Data = new String[] { // unpaired low surrogate "ab\udc17cd", "ab\ufffdcd", @@ -1642,7 +1643,7 @@ } UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); - + String s1 = new String(chars, 0, len); String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8"); assertEquals("codepoint " + ch, s1, s2); @@ -1699,7 +1700,7 @@ expected[i++] = 0xfffd; expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); hasIllegal = true; - } else + } else expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); } else { expected[i] = buffer[i] = ' '; @@ -1796,10 +1797,10 @@ final TokenStream tokens = new TokenStream() { final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - + final Iterator terms = Arrays.asList("a","b","c").iterator(); boolean first = true; - + @Override public boolean incrementToken() { if (!terms.hasNext()) return false; @@ -1856,7 +1857,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1912,7 +1913,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1979,7 +1980,7 @@ byte[] b = new byte[50]; for(int i=0;i<50;i++) b[i] = (byte) (i+77); - + Document doc = new Document(); Field f = new Field("binary", b, 10, 17); byte[] bx = f.getBinaryValue(); @@ -2016,7 +2017,7 @@ // commit(Map) never called for this index assertEquals(0, r.getCommitUserData().size()); r.close(); - + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); for(int j=0;j<17;j++) addDoc(w); @@ -2024,7 +2025,7 @@ data.put("label", "test1"); w.commit(data); w.close(); - + assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); r = IndexReader.open(dir, true); @@ -2036,7 +2037,7 @@ w.close(); assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); - + dir.close(); } @@ -2046,7 +2047,7 @@ Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random); analyzer.setPositionIncrementGap( 100 ); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); Field f = newField("field", "", Field.Store.NO, @@ -2073,7 +2074,7 @@ // LUCENE-1468 -- make sure opening an IndexWriter with // create=true does not remove non-index files - + public void testOtherFiles() throws Throwable { Directory dir = newDirectory(); try { @@ -2132,7 +2133,7 @@ @Override public void run() { // LUCENE-2239: won't work with NIOFS/MMAP - Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); + Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); IndexWriter w = null; while(!finish) { try { @@ -2141,7 +2142,7 @@ if (w != null) { w.close(); } - IndexWriterConfig conf = newIndexWriterConfig( + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2); w = new IndexWriter(dir, conf); @@ -2208,10 +2209,10 @@ e.printStackTrace(System.out); } } - try { + try { dir.close(); - } catch (IOException e) { - throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); } } } @@ -2226,7 +2227,7 @@ // interrupt arrives while class loader is trying to // init this class (in servicing a first interrupt): assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); - + // issue 100 interrupts to child thread int i = 0; while(i < 100) { @@ -2260,12 +2261,12 @@ doc.add(f); doc.add(f2); w.addDocument(doc); - + // add 2 docs to test in-memory merging f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); w.addDocument(doc); - + // force segment flush so we can force a segment merge with doc3 later. w.commit(); @@ -2288,7 +2289,7 @@ assertTrue(ir.document(0).getFieldable("binary").isBinary()); assertTrue(ir.document(1).getFieldable("binary").isBinary()); assertTrue(ir.document(2).getFieldable("binary").isBinary()); - + assertEquals("value", ir.document(0).get("string")); assertEquals("value", ir.document(1).get("string")); assertEquals("value", ir.document(2).get("string")); @@ -2359,7 +2360,7 @@ public void testNoDocsIndex() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); writer.setInfoStream(new PrintStream(bos)); @@ -2369,7 +2370,7 @@ _TestUtil.checkIndex(dir); dir.close(); } - + // LUCENE-2095: make sure with multiple threads commit // doesn't return until all changes are in fact in the // index @@ -2377,7 +2378,7 @@ final int NUM_THREADS = 5; final double RUN_SEC = 0.5; final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( + final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); _TestUtil.reduceOpenFiles(w.w); w.commit(); @@ -2562,7 +2563,7 @@ Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED); d.add(f); w.addDocument(d); - + IndexReader r = w.getReader().getSequentialSubReaders()[0]; TermsEnum t = r.fields().terms("field").iterator(); int count = 0; @@ -2648,10 +2649,10 @@ // in case a deletion policy which holds onto commits is used. Directory dir = newDirectory(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(sdp)); - + // First commit Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2661,7 +2662,7 @@ // Keep that commit sdp.snapshot("id"); - + // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2673,25 +2674,13 @@ sdp.release("id"); writer.deleteUnusedFiles(); assertEquals(1, IndexReader.listCommits(dir).size()); - + writer.close(); dir.close(); } - - private static class FlushCountingIndexWriter extends IndexWriter { - int flushCount; - public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException { - super(dir, iwc); - } - @Override - public void doAfterFlush() { - flushCount++; - } - } public void testIndexingThenDeleting() throws Exception { final Random r = random; - Directory dir = newDirectory(); // note this test explicitly disables payloads final Analyzer analyzer = new Analyzer() { @@ -2700,7 +2689,7 @@ return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); } }; - FlushCountingIndexWriter w = new FlushCountingIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(-1).setMaxBufferedDeleteTerms(-1)); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(newField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO, Field.Index.ANALYZED)); @@ -2714,15 +2703,15 @@ } if (doIndexing) { // Add docs until a flush is triggered - final int startFlushCount = w.flushCount; - while(w.flushCount == startFlushCount) { + final int startFlushCount = w.getFlushCount(); + while(w.getFlushCount() == startFlushCount) { w.addDocument(doc); count++; } } else { // Delete docs until a flush is triggered - final int startFlushCount = w.flushCount; - while(w.flushCount == startFlushCount) { + final int startFlushCount = w.getFlushCount(); + while(w.getFlushCount() == startFlushCount) { w.deleteDocuments(new Term("foo", ""+count)); count++; } @@ -2732,7 +2721,7 @@ w.close(); dir.close(); } - + public void testNoCommits() throws Exception { // Tests that if we don't call commit(), the directory has 0 commits. This has // changed since LUCENE-2386, where before IW would always commit on a fresh @@ -2753,7 +2742,7 @@ public void testEmptyFSDirWithNoLock() throws Exception { // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), - // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed + // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed // when listAll() was called in IndexFileDeleter. Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); @@ -2762,10 +2751,10 @@ public void testEmptyDirRollback() throws Exception { // Tests that if IW is created over an empty Directory, some documents are - // indexed, flushed (but not committed) and then IW rolls back, then no + // indexed, flushed (but not committed) and then IW rolls back, then no // files are left in the Directory. Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); String[] files = dir.listAll(); @@ -2789,7 +2778,7 @@ writer.addDocument(doc); // Adding just one document does not call flush yet. assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); - + doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); @@ -2810,17 +2799,17 @@ public void testNoSegmentFile() throws IOException { Directory dir = newDirectory(); dir.setLockFactory(NoLockFactory.getNoLockFactory()); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); - + Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); w.addDocument(doc); w.addDocument(doc); - IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2) .setOpenMode(OpenMode.CREATE)); - + w2.close(); // If we don't do that, the test fails on Windows w.rollback(); @@ -2859,7 +2848,7 @@ w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); assertEquals(1, w.numDocs()); - + // commit IndexWriter to "third" w.addDocument(doc); commitData.put("tag", "third"); @@ -2914,7 +2903,7 @@ } final int docCount = 200*RANDOM_MULTIPLIER; final int fieldCount = _TestUtil.nextInt(rand, 1, 5); - + final List fieldIDs = new ArrayList(); Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -2924,7 +2913,7 @@ } final Map docs = new HashMap(); - + if (VERBOSE) { System.out.println("TEST: build index docCount=" + docCount); } @@ -3111,7 +3100,7 @@ Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, dir, new StringSplitAnalyzer()); - char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8]; + char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy) @@ -223,8 +223,9 @@ threads[i].join(); for(int i=0;i dif = difFiles(files, files2); - + if (!Arrays.equals(files, files2)) { fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.length-files.length) + " files but only deleted " + (filesPre.length - files2.length) + "; expected files:\n " + asString(files) + "\n actual files:\n " + asString(files2)+"\ndif: "+dif); } @@ -172,7 +172,7 @@ Set set1 = new HashSet(); Set set2 = new HashSet(); Set extra = new HashSet(); - + for (int x=0; x < files1.length; x++) { set1.add(files1[x]); } @@ -195,7 +195,7 @@ } return extra; } - + private String asString(String[] l) { String s = ""; for(int i=0;i getters = new HashSet(); getters.add("getAnalyzer"); @@ -91,7 +96,11 @@ getters.add("getMergePolicy"); getters.add("getMaxThreadStates"); getters.add("getReaderPooling"); + getters.add("getIndexerThreadPool"); getters.add("getReaderTermsIndexDivisor"); + getters.add("getFlushPolicy"); + getters.add("getRAMPerThreadHardLimitMB"); + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) { assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName())); @@ -107,12 +116,12 @@ if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) { - assertEquals("method " + m.getName() + " does not return IndexWriterConfig", + assertEquals("method " + m.getName() + " does not return IndexWriterConfig", IndexWriterConfig.class, m.getReturnType()); } } } - + @Test public void testConstants() throws Exception { // Tests that the values of the constants does not change @@ -126,7 +135,7 @@ assertEquals(8, IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); assertEquals(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR); } - + @Test public void testToString() throws Exception { String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).toString(); @@ -143,15 +152,15 @@ assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1); } } - + @Test public void testClone() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); IndexWriterConfig clone = (IndexWriterConfig) conf.clone(); - + // Clone is shallow since not all parameters are cloneable. assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy()); - + conf.setMergeScheduler(new SerialMergeScheduler()); assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass()); } @@ -159,14 +168,14 @@ @Test public void testInvalidValues() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); - + // Test IndexDeletionPolicy assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(null); assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); - + // Test MergeScheduler assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); conf.setMergeScheduler(new SerialMergeScheduler()); @@ -183,12 +192,12 @@ assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); // Test IndexingChain - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); conf.setIndexingChain(new MyIndexingChain()); assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass()); conf.setIndexingChain(null); - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); - + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); + try { conf.setMaxBufferedDeleteTerms(0); fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); @@ -239,12 +248,26 @@ // this is expected } + try { + conf.setRAMPerThreadHardLimitMB(2048); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); + } catch (IllegalArgumentException e) { + // this is expected + } + + try { + conf.setRAMPerThreadHardLimitMB(0); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); + } catch (IllegalArgumentException e) { + // this is expected + } + assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); - conf.setMaxThreadStates(5); + conf.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(5)); assertEquals(5, conf.getMaxThreadStates()); - conf.setMaxThreadStates(0); + conf.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(0)); assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); - + // Test MergePolicy assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(new LogDocMergePolicy()); Index: lucene/src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestCodecs.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -381,7 +381,7 @@ this.register(new MockSepCodec()); this.setDefaultFieldCodec("MockSep"); } - + } private class Verify extends Thread { Index: lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (working copy) @@ -50,7 +50,7 @@ boolean isClose = false; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { - if ("doFlush".equals(trace[i].getMethodName())) { + if ("flush".equals(trace[i].getMethodName())) { isDoFlush = true; } if ("close".equals(trace[i].getMethodName())) { Index: lucene/src/test/org/apache/lucene/index/TestByteSlices.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestByteSlices.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestByteSlices.java (working copy) @@ -39,7 +39,7 @@ starts[stream] = -1; counters[stream] = 0; } - + int num = 10000 * RANDOM_MULTIPLIER; for (int iter = 0; iter < num; iter++) { int stream = random.nextInt(NUM_STREAM); @@ -67,7 +67,7 @@ if (VERBOSE) System.out.println(" addr now " + uptos[stream]); } - + for(int stream=0;stream stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); //System.out.println("stored size: " + stored.size()); assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3); - + TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); assertTrue(vector != null); BytesRef [] terms = vector.getTerms(); @@ -116,7 +115,7 @@ assertTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); assertTrue(vector instanceof TermPositionVector == true); - + for (int i = 0; i < terms.length; i++) { String term = terms[i].utf8ToString(); int freq = freqs[i]; @@ -127,5 +126,5 @@ TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); - } + } } Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -42,7 +42,7 @@ import org.apache.lucene.util._TestUtil; public class TestAddIndexes extends LuceneTestCase { - + public void testSimpleCase() throws IOException { // main directory Directory dir = newDirectory(); @@ -204,9 +204,9 @@ doc.add(newField("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.updateDocument(new Term("id", "" + (i%10)), doc); } - + writer.addIndexes(aux); - + // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.add(new Term("content", "bbb")); @@ -619,7 +619,7 @@ doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } - + private abstract class RunAddIndexesThreads { Directory dir, dir2; @@ -646,8 +646,8 @@ writer2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer2.setInfoStream(VERBOSE ? System.out : null); writer2.commit(); - + readers = new IndexReader[NUM_COPY]; for(int i=0;i dataset = asSet(data); - + private static String MAGIC_FIELD = "f"+(NUM_FIELDS/3); - + private static FieldSelector SELECTOR = new FieldSelector() { public FieldSelectorResult accept(String f) { if (f.equals(MAGIC_FIELD)) { @@ -58,22 +58,21 @@ return FieldSelectorResult.LAZY_LOAD; } }; - - private Directory makeIndex() throws Exception { + + private Directory makeIndex() throws Exception { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); - for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++ ) { - doc.add(newField("f"+f, - data[f % data.length] - + '#' + data[random.nextInt(data.length)], - Field.Store.YES, + doc.add(newField("f"+f, + data[f % data.length] + + '#' + data[random.nextInt(data.length)], + Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); @@ -84,14 +83,14 @@ } return dir; } - + public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); - + List fields = d.getFields(); for (Iterator fi = fields.iterator(); fi.hasNext(); ) { Fieldable f=null; @@ -101,7 +100,7 @@ String fval = f.stringValue(); assertNotNull(docs[i]+" FIELD: "+fname, fval); String[] vals = fval.split("#"); - if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { + if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:"+fname+",VAL:"+fval); } } catch (Exception e) { @@ -116,7 +115,7 @@ public void testLazyWorks() throws Exception { doTest(new int[] { 399 }); } - + public void testLazyAlsoWorks() throws Exception { doTest(new int[] { 399, 150 }); } Index: lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java (working copy) @@ -32,13 +32,12 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.Version; public class TestPerSegmentDeletes extends LuceneTestCase { public void testDeletes1() throws Exception { //IndexWriter.debug2 = System.out; Directory dir = new MockDirectoryWrapper(new Random(random.nextLong()), new RAMDirectory()); - IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setMaxBufferedDocs(5000); @@ -66,41 +65,41 @@ writer.addDocument(TestIndexWriterReader.createDocument(x, "3", 2)); //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } - + writer.deleteDocuments(new Term("id", "1")); - + writer.deleteDocuments(new Term("id", "11")); - // flushing without applying deletes means + // flushing without applying deletes means // there will still be deletes in the segment infos writer.flush(false, false); assertTrue(writer.bufferedDeletesStream.any()); - + // get reader flushes pending deletes // so there should not be anymore IndexReader r1 = writer.getReader(); assertFalse(writer.bufferedDeletesStream.any()); r1.close(); - + // delete id:2 from the first segment // merge segments 0 and 1 // which should apply the delete id:2 writer.deleteDocuments(new Term("id", "2")); - writer.flush(false, false); + writer.flush(false, true); fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; writer.maybeMerge(); - + assertEquals(2, writer.segmentInfos.size()); - + // id:2 shouldn't exist anymore because // it's been applied in the merge and now it's gone IndexReader r2 = writer.getReader(); int[] id2docs = toDocsArray(new Term("id", "2"), null, r2); assertTrue(id2docs == null); r2.close(); - + /** // added docs are in the ram buffer for (int x = 15; x < 20; x++) { @@ -110,43 +109,43 @@ assertTrue(writer.numRamDocs() > 0); // delete from the ram buffer writer.deleteDocuments(new Term("id", Integer.toString(13))); - + Term id3 = new Term("id", Integer.toString(3)); - + // delete from the 1st segment writer.deleteDocuments(id3); - + assertTrue(writer.numRamDocs() > 0); - + //System.out // .println("segdels1:" + writer.docWriter.deletesToString()); - + //assertTrue(writer.docWriter.segmentDeletes.size() > 0); - + // we cause a merge to happen fsmp.doMerge = true; fsmp.start = 0; fsmp.length = 2; System.out.println("maybeMerge "+writer.segmentInfos); - + SegmentInfo info0 = writer.segmentInfos.get(0); SegmentInfo info1 = writer.segmentInfos.get(1); - + writer.maybeMerge(); System.out.println("maybeMerge after "+writer.segmentInfos); // there should be docs in RAM assertTrue(writer.numRamDocs() > 0); - + // assert we've merged the 1 and 2 segments // and still have a segment leftover == 2 assertEquals(2, writer.segmentInfos.size()); assertFalse(segThere(info0, writer.segmentInfos)); assertFalse(segThere(info1, writer.segmentInfos)); - + //System.out.println("segdels2:" + writer.docWriter.deletesToString()); - + //assertTrue(writer.docWriter.segmentDeletes.size() > 0); - + IndexReader r = writer.getReader(); IndexReader r1 = r.getSequentialSubReaders()[0]; printDelDocs(r1.getDeletedDocs()); @@ -155,7 +154,7 @@ // there shouldn't be any docs for id:3 assertTrue(docs == null); r.close(); - + part2(writer, fsmp); **/ // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); @@ -163,7 +162,7 @@ writer.close(); dir.close(); } - + /** static boolean hasPendingDeletes(SegmentInfos infos) { for (SegmentInfo info : infos) { @@ -185,42 +184,42 @@ //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); } writer.flush(false, false); - + //System.out.println("infos3:"+writer.segmentInfos); - + Term delterm = new Term("id", "8"); writer.deleteDocuments(delterm); //System.out.println("segdels3:" + writer.docWriter.deletesToString()); - + fsmp.doMerge = true; fsmp.start = 1; fsmp.length = 2; writer.maybeMerge(); - - // deletes for info1, the newly created segment from the + + // deletes for info1, the newly created segment from the // merge should have no deletes because they were applied in // the merge //SegmentInfo info1 = writer.segmentInfos.get(1); //assertFalse(exists(info1, writer.docWriter.segmentDeletes)); - + //System.out.println("infos4:"+writer.segmentInfos); //System.out.println("segdels4:" + writer.docWriter.deletesToString()); } - + boolean segThere(SegmentInfo info, SegmentInfos infos) { for (SegmentInfo si : infos) { - if (si.name.equals(info.name)) return true; + if (si.name.equals(info.name)) return true; } return false; } - + public static void printDelDocs(Bits bits) { if (bits == null) return; for (int x = 0; x < bits.length(); x++) { System.out.println(x + ":" + bits.get(x)); } } - + public static int[] toDocsArray(Term term, Bits bits, IndexReader reader) throws IOException { Fields fields = MultiFields.getFields(reader); @@ -233,7 +232,7 @@ } return null; } - + public static int[] toArray(DocsEnum docsEnum) throws IOException { List docs = new ArrayList(); while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { @@ -242,21 +241,21 @@ } return ArrayUtil.toIntArray(docs); } - + public class RangeMergePolicy extends MergePolicy { boolean doMerge = false; int start; int length; - + private final boolean useCompoundFile; - + private RangeMergePolicy(boolean useCompoundFile) { this.useCompoundFile = useCompoundFile; } - + @Override public void close() {} - + @Override public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { @@ -273,20 +272,20 @@ } return null; } - + @Override public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) throws CorruptIndexException, IOException { return null; } - + @Override public MergeSpecification findMergesToExpungeDeletes( SegmentInfos segmentInfos) throws CorruptIndexException, IOException { return null; } - + @Override public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) { return useCompoundFile; Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java (working copy) @@ -37,7 +37,7 @@ */ public class TestIndexWriterOnJRECrash extends TestNRTThreads { private File tempDir; - + @Override public void setUp() throws Exception { super.setUp(); @@ -45,13 +45,13 @@ tempDir.delete(); tempDir.mkdir(); } - + @Override public void testNRTThreads() throws Exception { String vendor = Constants.JAVA_VENDOR; - assumeTrue(vendor + " JRE not supported.", + assumeTrue(vendor + " JRE not supported.", vendor.startsWith("Sun") || vendor.startsWith("Apple")); - + // if we are not the fork if (System.getProperty("tests.crashmode") == null) { // try up to 10 times to create an index @@ -81,11 +81,11 @@ } } } - + /** fork ourselves in a new jvm. sets -Dtests.crashmode=true */ public void forkTest() throws Exception { List cmd = new ArrayList(); - cmd.add(System.getProperty("java.home") + cmd.add(System.getProperty("java.home") + System.getProperty("file.separator") + "bin" + System.getProperty("file.separator") @@ -116,7 +116,7 @@ if (VERBOSE) System.err.println("<<< End subprocess output"); p.waitFor(); } - + /** * Recursively looks for indexes underneath file, * and runs checkindex on them. returns true if it found any indexes. @@ -139,7 +139,7 @@ } return false; } - + /** * currently, this only works/tested on Sun and IBM. */ Index: lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (working copy) @@ -40,7 +40,7 @@ */ public class TestDeletionPolicy extends LuceneTestCase { - + private void verifyCommitOrder(List commits) throws IOException { final IndexCommit firstCommit = commits.get(0); long last = SegmentInfos.generationFromSegmentsFileName(firstCommit.getSegmentsFileName()); @@ -135,7 +135,7 @@ verifyCommitOrder(commits); doDeletes(commits, true); } - + private void doDeletes(List commits, boolean isCommit) { // Assert that we really are only called for each new @@ -248,7 +248,7 @@ // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.getCurrentSegmentGeneration(dir); - + String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); @@ -276,7 +276,7 @@ // OK break; } - + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } @@ -449,7 +449,7 @@ // Now 8 because we made another commit assertEquals(7, IndexReader.listCommits(dir).size()); - + r = IndexReader.open(dir, true); // Not optimized because we rolled it back, and now only // 10 docs @@ -471,7 +471,7 @@ // but this time keeping only the last commit: writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); - + // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.open(dir, true); @@ -626,7 +626,7 @@ } IndexWriter writer = new IndexWriter(dir, conf); writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for(int i=0;i uniqueValues = new HashSet(); + for (int j = 0; j < ids.length; j++) { + Integer i = ids[j]; + // create an array here since we compare identity below against tailItem + Term[] term = new Term[] {template.createTerm(i.toString())}; + uniqueValues.add(term[0]); + queue.addDelete(term); + if (random.nextInt(20) == 0 || j == ids.length - 1) { + queue.updateSlice(slice1); + assertTrue(slice1.isTailItem(term)); + slice1.apply(bd1, j); + assertAllBetween(last1, j, bd1, ids); + last1 = j + 1; + } + if (random.nextInt(10) == 5 || j == ids.length - 1) { + queue.updateSlice(slice2); + assertTrue(slice2.isTailItem(term)); + slice2.apply(bd2, j); + assertAllBetween(last2, j, bd2, ids); + last2 = j + 1; + } + assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes()); + } + assertEquals(uniqueValues, bd1.terms.keySet()); + assertEquals(uniqueValues, bd2.terms.keySet()); + assertEquals(uniqueValues, new HashSet(Arrays.asList(queue + .freezeGlobalBuffer(null).terms))); + assertEquals("num deletes must be 0 after freeze", 0, queue + .numGlobalTermDeletes()); + } + + private void assertAllBetween(int start, int end, BufferedDeletes deletes, + Integer[] ids) { + Term template = new Term("id"); + for (int i = start; i <= end; i++) { + assertEquals(Integer.valueOf(end), deletes.terms.get(template + .createTerm(ids[i].toString()))); + } + } + + public void testClear() { + DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); + Term template = new Term("id"); + assertFalse(queue.anyChanges()); + queue.clear(); + assertFalse(queue.anyChanges()); + final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER; + int termsSinceFreeze = 0; + int queriesSinceFreeze = 0; + for (int i = 0; i < size; i++) { + Term term = template.createTerm("" + i); + if (random.nextInt(10) == 0) { + queue.addDelete(new TermQuery(term)); + queriesSinceFreeze++; + } else { + queue.addDelete(term); + termsSinceFreeze++; + } + assertTrue(queue.anyChanges()); + if (random.nextInt(10) == 0) { + queue.clear(); + queue.tryApplyGlobalSlice(); + assertFalse(queue.anyChanges()); + } + } + + } + + public void testAnyChanges() { + DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); + Term template = new Term("id"); + final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER; + int termsSinceFreeze = 0; + int queriesSinceFreeze = 0; + for (int i = 0; i < size; i++) { + Term term = template.createTerm("" + i); + if (random.nextInt(10) == 0) { + queue.addDelete(new TermQuery(term)); + queriesSinceFreeze++; + } else { + queue.addDelete(term); + termsSinceFreeze++; + } + assertTrue(queue.anyChanges()); + if (random.nextInt(5) == 0) { + FrozenBufferedDeletes freezeGlobalBuffer = queue + .freezeGlobalBuffer(null); + assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.length); + assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length); + queriesSinceFreeze = 0; + termsSinceFreeze = 0; + assertFalse(queue.anyChanges()); + } + } + } + + public void testStressDeleteQueue() throws InterruptedException { + DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue(); + Set uniqueValues = new HashSet(); + final int size = 10000 + random.nextInt(500) * RANDOM_MULTIPLIER; + Integer[] ids = new Integer[size]; + Term template = new Term("id"); + for (int i = 0; i < ids.length; i++) { + ids[i] = random.nextInt(); + uniqueValues.add(template.createTerm(ids[i].toString())); + } + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger index = new AtomicInteger(0); + final int numThreads = 2 + random.nextInt(5); + UpdateThread[] threads = new UpdateThread[numThreads]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new UpdateThread(queue, index, ids, latch); + threads[i].start(); + } + latch.countDown(); + for (int i = 0; i < threads.length; i++) { + threads[i].join(); + } + + for (UpdateThread updateThread : threads) { + DeleteSlice slice = updateThread.slice; + queue.updateSlice(slice); + BufferedDeletes deletes = updateThread.deletes; + slice.apply(deletes, BufferedDeletes.MAX_INT); + assertEquals(uniqueValues, deletes.terms.keySet()); + } + queue.tryApplyGlobalSlice(); + assertEquals(uniqueValues, new HashSet(Arrays.asList(queue + .freezeGlobalBuffer(null).terms))); + assertEquals("num deletes must be 0 after freeze", 0, queue + .numGlobalTermDeletes()); + } + + private static class UpdateThread extends Thread { + final DocumentsWriterDeleteQueue queue; + final AtomicInteger index; + final Integer[] ids; + final DeleteSlice slice; + final BufferedDeletes deletes; + final CountDownLatch latch; + + protected UpdateThread(DocumentsWriterDeleteQueue queue, + AtomicInteger index, Integer[] ids, CountDownLatch latch) { + this.queue = queue; + this.index = index; + this.ids = ids; + this.slice = queue.newSlice(); + deletes = new BufferedDeletes(false); + this.latch = latch; + } + + @Override + public void run() { + try { + latch.await(); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + Term template = new Term("id"); + int i = 0; + while ((i = index.getAndIncrement()) < ids.length) { + Term term = template.createTerm(ids[i].toString()); + queue.add(term, slice); + assertTrue(slice.isTailItem(term)); + slice.apply(deletes, BufferedDeletes.MAX_INT); + } + } + } + +} Property changes on: lucene\src\test\org\apache\lucene\index\TestDocumentsWriterDeleteQueue.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java (revision 0) +++ lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java (revision 0) @@ -0,0 +1,432 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.LineFileDocs; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThrottledIndexOutput; +import org.junit.Before; + +public class TestFlushByRamOrCountsPolicy extends LuceneTestCase { + + private LineFileDocs lineDocFile; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + lineDocFile = new LineFileDocs(random); + } + + public void testFlushByRam() throws CorruptIndexException, + LockObtainFailedException, IOException, InterruptedException { + int[] numThreads = new int[] { 3 + random.nextInt(12), 1 }; + for (int i = 0; i < numThreads.length; i++) { + runFlushByRam(numThreads[i], + 1 + random.nextInt(10) + random.nextDouble(), false); + } + + for (int i = 0; i < numThreads.length; i++) { + // with a 512 mb ram buffer we should never stall + runFlushByRam(numThreads[i], 512.d, true); + } + } + + protected void runFlushByRam(int numThreads, double maxRam, + boolean ensureNotStalled) throws IOException, CorruptIndexException, + LockObtainFailedException, InterruptedException { + final int numDocumentsToIndex = 50 + random.nextInt(150); + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + Directory dir = newDirectory(); + MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setFlushPolicy(flushPolicy); + + final int numDWPT = 1 + random.nextInt(8); + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numDWPT); + iwc.setIndexerThreadPool(threadPool); + iwc.setRAMBufferSizeMB(1 + random.nextInt(10) + random.nextDouble()); + iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + IndexWriter writer = new IndexWriter(dir, iwc); + assertFalse(flushPolicy.flushOnDocCount()); + assertFalse(flushPolicy.flushOnDeleteTerms()); + assertTrue(flushPolicy.flushOnRAM()); + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); + + IndexThread[] threads = new IndexThread[numThreads]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, + false); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); + assertEquals(" all flushes must be due numThreads=" + numThreads, 0, + flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + assertTrue("peak bytes without flush exceeded watermark", + flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); + assertActiveBytesAfter(flushControl); + if (flushPolicy.hasMarkedPending) { + assertTrue(maxRAMBytes < flushControl.peakActiveBytes); + } + if (ensureNotStalled) { + assertFalse(docsWriter.healthiness.wasStalled); + } + writer.close(); + assertEquals(0, flushControl.activeBytes()); + dir.close(); + } + + public void testFlushDocCount() throws CorruptIndexException, + LockObtainFailedException, IOException, InterruptedException { + int[] numThreads = new int[] { 3 + random.nextInt(12), 1 }; + for (int i = 0; i < numThreads.length; i++) { + + final int numDocumentsToIndex = 50 + random.nextInt(150); + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + Directory dir = newDirectory(); + MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setFlushPolicy(flushPolicy); + + final int numDWPT = 1 + random.nextInt(8); + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numDWPT); + iwc.setIndexerThreadPool(threadPool); + iwc.setMaxBufferedDocs(2 + random.nextInt(50)); + iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + IndexWriter writer = new IndexWriter(dir, iwc); + assertTrue(flushPolicy.flushOnDocCount()); + assertFalse(flushPolicy.flushOnDeleteTerms()); + assertFalse(flushPolicy.flushOnRAM()); + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); + + IndexThread[] threads = new IndexThread[numThreads[i]]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads[i], writer, + lineDocFile, false); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + + assertEquals(" all flushes must be due numThreads=" + numThreads[i], 0, + flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + assertTrue("peak bytes without flush exceeded watermark", + flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs()); + assertActiveBytesAfter(flushControl); + writer.close(); + assertEquals(0, flushControl.activeBytes()); + dir.close(); + } + } + + public void testRandom() throws IOException, InterruptedException { + final int numThreads = 1 + random.nextInt(8); + final int numDocumentsToIndex = 100 + random.nextInt(300); + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + iwc.setFlushPolicy(flushPolicy); + + final int numDWPT = 1 + random.nextInt(8); + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numDWPT); + iwc.setIndexerThreadPool(threadPool); + + IndexWriter writer = new IndexWriter(dir, iwc); + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + + assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes()); + + IndexThread[] threads = new IndexThread[numThreads]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile, + true); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + if (flushPolicy.flushOnRAM() && !flushPolicy.flushOnDocCount() + && !flushPolicy.flushOnDeleteTerms()) { + final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.); + assertTrue("peak bytes without flush exceeded watermark", + flushPolicy.peakBytesWithoutFlush <= maxRAMBytes); + if (flushPolicy.hasMarkedPending) { + assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes, + maxRAMBytes <= flushControl.peakActiveBytes); + } + } + assertActiveBytesAfter(flushControl); + writer.commit(); + assertEquals(0, flushControl.activeBytes()); + IndexReader r = IndexReader.open(dir); + assertEquals(numDocumentsToIndex, r.numDocs()); + assertEquals(numDocumentsToIndex, r.maxDoc()); + if (!flushPolicy.flushOnRAM()) { + assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled); + assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked()); + } + r.close(); + writer.close(); + dir.close(); + } + + public void testHealthyness() throws InterruptedException, + CorruptIndexException, LockObtainFailedException, IOException { + + int[] numThreads = new int[] { 4 + random.nextInt(8), 1 }; + final int numDocumentsToIndex = 50 + random.nextInt(50); + for (int i = 0; i < numThreads.length; i++) { + AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); + MockDirectoryWrapper dir = newDirectory(); + // mock a very slow harddisk here so that flushing is very slow + dir.setThrottledIndexOutput(new ThrottledIndexOutput(ThrottledIndexOutput + .mBitsToBytes(40 + random.nextInt(10)), 5 + random.nextInt(5), null)); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy(); + iwc.setFlushPolicy(flushPolicy); + + DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( + numThreads[i]== 1 ? 1 : 2); + iwc.setIndexerThreadPool(threadPool); + // with such a small ram buffer we should be stalled quiet quickly + iwc.setRAMBufferSizeMB(0.25); + IndexWriter writer = new IndexWriter(dir, iwc); + IndexThread[] threads = new IndexThread[numThreads[i]]; + for (int x = 0; x < threads.length; x++) { + threads[x] = new IndexThread(numDocs, numThreads[i], writer, + lineDocFile, false); + threads[x].start(); + } + + for (int x = 0; x < threads.length; x++) { + threads[x].join(); + } + DocumentsWriter docsWriter = writer.getDocsWriter(); + assertNotNull(docsWriter); + DocumentsWriterFlushControl flushControl = docsWriter.flushControl; + assertEquals(" all flushes must be due", 0, flushControl.flushBytes()); + assertEquals(numDocumentsToIndex, writer.numDocs()); + assertEquals(numDocumentsToIndex, writer.maxDoc()); + if (numThreads[i] == 1) { + assertFalse( + "single thread must not stall", + docsWriter.healthiness.wasStalled); + assertFalse( + "single thread must not block numThreads: " + numThreads[i], + docsWriter.healthiness.hasBlocked()); + // this assumption is too strict in this test +// } else { +// if (docsWriter.healthiness.wasStalled) { +// // TODO maybe this assumtion is too strickt +// assertTrue(" we should have blocked here numThreads: " +// + numThreads[i], docsWriter.healthiness.hasBlocked()); +// } + } + assertActiveBytesAfter(flushControl); + writer.close(true); + dir.close(); + } + } + + protected void assertActiveBytesAfter(DocumentsWriterFlushControl flushControl) { + Iterator allActiveThreads = flushControl.allActiveThreads(); + long bytesUsed = 0; + while (allActiveThreads.hasNext()) { + bytesUsed += allActiveThreads.next().perThread.bytesUsed(); + } + assertEquals(bytesUsed, flushControl.activeBytes()); + } + + public class IndexThread extends Thread { + IndexWriter writer; + IndexWriterConfig iwc; + LineFileDocs docs; + private AtomicInteger pendingDocs; + private final boolean doRandomCommit; + + public IndexThread(AtomicInteger pendingDocs, int numThreads, + IndexWriter writer, LineFileDocs docs, boolean doRandomCommit) { + this.pendingDocs = pendingDocs; + this.writer = writer; + iwc = writer.getConfig(); + this.docs = docs; + this.doRandomCommit = doRandomCommit; + } + + public void run() { + try { + long ramSize = 0; + while (pendingDocs.decrementAndGet() > -1) { + Document doc = docs.nextDoc(); + writer.addDocument(doc); + long newRamSize = writer.ramSizeInBytes(); + if (newRamSize != ramSize) { + ramSize = newRamSize; + } + if (doRandomCommit) { + int commit; + synchronized (random) { + commit = random.nextInt(20); + } + if (commit == 0) { + writer.commit(); + } + } + } + writer.commit(); + } catch (Throwable ex) { + throw new RuntimeException(ex); + } + } + } + + private static class MockDefaultFlushPolicy extends FlushByRamOrCountsPolicy { + long peakBytesWithoutFlush = Integer.MIN_VALUE; + long peakDocCountWithoutFlush = Integer.MIN_VALUE; + boolean hasMarkedPending = false; + + @Override + public void onDelete(DocumentsWriterFlushControl control, ThreadState state) { + final ArrayList pending = new ArrayList(); + final ArrayList notPending = new ArrayList(); + findPending(control, pending, notPending); + final boolean flushCurrent = state.flushPending; + final ThreadState toFlush; + if (state.flushPending) { + toFlush = state; + } else if (flushOnDeleteTerms() + && state.perThread.pendingDeletes.numTermDeletes.get() >= indexWriterConfig + .getMaxBufferedDeleteTerms()) { + toFlush = state; + } else { + toFlush = null; + } + super.onDelete(control, state); + if (toFlush != null) { + if (flushCurrent) { + assertTrue(pending.remove(toFlush)); + } else { + assertTrue(notPending.remove(toFlush)); + } + assertTrue(toFlush.flushPending); + hasMarkedPending = true; + } + + for (ThreadState threadState : notPending) { + assertFalse(threadState.flushPending); + } + } + + @Override + public void onInsert(DocumentsWriterFlushControl control, ThreadState state) { + final ArrayList pending = new ArrayList(); + final ArrayList notPending = new ArrayList(); + findPending(control, pending, notPending); + final boolean flushCurrent = state.flushPending; + long activeBytes = control.activeBytes(); + final ThreadState toFlush; + if (state.flushPending) { + toFlush = state; + } else if (flushOnDocCount() + && state.perThread.getNumDocsInRAM() >= indexWriterConfig + .getMaxBufferedDocs()) { + toFlush = state; + } else if (flushOnRAM() + && activeBytes >= (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024. * 1024.)) { + toFlush = findLargestNonPendingWriter(control, state); + assertFalse(toFlush.flushPending); + } else { + toFlush = null; + } + super.onInsert(control, state); + if (toFlush != null) { + if (flushCurrent) { + assertTrue(pending.remove(toFlush)); + } else { + assertTrue(notPending.remove(toFlush)); + } + assertTrue(toFlush.flushPending); + hasMarkedPending = true; + } else { + peakBytesWithoutFlush = Math.max(activeBytes, peakBytesWithoutFlush); + peakDocCountWithoutFlush = Math.max(state.perThread.getNumDocsInRAM(), + peakDocCountWithoutFlush); + } + + for (ThreadState threadState : notPending) { + assertFalse(threadState.flushPending); + } + } + } + + static void findPending(DocumentsWriterFlushControl flushControl, + ArrayList pending, ArrayList notPending) { + Iterator allActiveThreads = flushControl.allActiveThreads(); + while (allActiveThreads.hasNext()) { + ThreadState next = allActiveThreads.next(); + if (next.flushPending) { + pending.add(next); + } else { + notPending.add(next); + } + } + } +} Property changes on: lucene\src\test\org\apache\lucene\index\TestFlushByRamOrCountsPolicy.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (working copy) @@ -44,7 +44,7 @@ public class TestLazyProxSkipping extends LuceneTestCase { private IndexSearcher searcher; private int seeksCounter = 0; - + private String field = "tokens"; private String term1 = "xx"; private String term2 = "yy"; @@ -64,12 +64,12 @@ } return ii; } - + } - + private void createIndex(int numHits) throws IOException { int numDocs = 500; - + final Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { @@ -101,7 +101,7 @@ doc.add(newField(this.field, content, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } - + // make sure the index has only a single segment writer.optimize(); writer.close(); @@ -110,27 +110,27 @@ this.searcher = newSearcher(reader); } - + private ScoreDoc[] search() throws IOException { // create PhraseQuery "term1 term2" and search PhraseQuery pq = new PhraseQuery(); pq.add(new Term(this.field, this.term1)); pq.add(new Term(this.field, this.term2)); - return this.searcher.search(pq, null, 1000).scoreDocs; + return this.searcher.search(pq, null, 1000).scoreDocs; } - + private void performTest(int numHits) throws IOException { createIndex(numHits); this.seeksCounter = 0; ScoreDoc[] hits = search(); // verify that the right number of docs was found assertEquals(numHits, hits.length); - + // check if the number of calls of seek() does not exceed the number of hits assertTrue(this.seeksCounter > 0); assertTrue("seeksCounter=" + this.seeksCounter + " numHits=" + numHits, this.seeksCounter <= numHits + 1); } - + public void testLazySkipping() throws IOException { assumeFalse("This test cannot run with SimpleText codec", CodecProvider.getDefault().getFieldCodec(this.field).equals("SimpleText")); // test whether only the minimum amount of seeks() @@ -140,7 +140,7 @@ performTest(10); searcher.close(); } - + public void testSeek() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -149,7 +149,7 @@ doc.add(newField(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } - + writer.close(); IndexReader reader = IndexReader.open(directory, true); @@ -176,55 +176,55 @@ } reader.close(); directory.close(); - + } - + // Simply extends IndexInput in a way that we are able to count the number // of invocations of seek() class SeeksCountingStream extends IndexInput { - private IndexInput input; - - + private IndexInput input; + + SeeksCountingStream(IndexInput input) { this.input = input; - } - + } + @Override public byte readByte() throws IOException { return this.input.readByte(); } - + @Override public void readBytes(byte[] b, int offset, int len) throws IOException { - this.input.readBytes(b, offset, len); + this.input.readBytes(b, offset, len); } - + @Override public void close() throws IOException { this.input.close(); } - + @Override public long getFilePointer() { return this.input.getFilePointer(); } - + @Override public void seek(long pos) throws IOException { TestLazyProxSkipping.this.seeksCounter++; this.input.seek(pos); } - + @Override public long length() { return this.input.length(); } - + @Override public Object clone() { return new SeeksCountingStream((IndexInput) this.input.clone()); } - + } } Index: lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java =================================================================== --- lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java (revision 1097441) +++ lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java (working copy) @@ -38,12 +38,12 @@ Set fileExtensions = new HashSet(); fileExtensions.add(IndexFileNames.FIELDS_EXTENSION); fileExtensions.add(IndexFileNames.FIELDS_INDEX_EXTENSION); - + MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); primaryDir.setCheckIndexOnClose(false); // only part of an index MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); secondaryDir.setCheckIndexOnClose(false); // only part of an index - + FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); IndexWriter writer = new IndexWriter( fsd, Index: lucene/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -35,20 +35,20 @@ final class DocInverterPerField extends DocFieldConsumerPerField { - final private DocInverterPerThread perThread; - final private FieldInfo fieldInfo; + final private DocInverter parent; + final FieldInfo fieldInfo; final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; - public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) { - this.perThread = perThread; + public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) { + this.parent = parent; this.fieldInfo = fieldInfo; - docState = perThread.docState; - fieldState = perThread.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); - this.endConsumer = perThread.endConsumer.addField(this, fieldInfo); + docState = parent.docState; + fieldState = parent.fieldState; + this.consumer = parent.consumer.addField(this, fieldInfo); + this.endConsumer = parent.endConsumer.addField(this, fieldInfo); } @Override @@ -80,8 +80,8 @@ if (!field.isTokenized()) { // un-tokenized field String stringValue = field.stringValue(); final int valueLength = stringValue.length(); - perThread.singleToken.reinit(stringValue, 0, valueLength); - fieldState.attributeSource = perThread.singleToken; + parent.singleToken.reinit(stringValue, 0, valueLength); + fieldState.attributeSource = parent.singleToken; consumer.start(field); boolean success = false; @@ -89,8 +89,9 @@ consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.offset += valueLength; fieldState.length++; @@ -114,8 +115,8 @@ if (stringValue == null) { throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); } - perThread.stringReader.init(stringValue); - reader = perThread.stringReader; + parent.stringReader.init(stringValue); + reader = parent.stringReader; } // Tokenize field and add to postingTable @@ -166,8 +167,9 @@ consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.length++; fieldState.position++; @@ -195,4 +197,9 @@ consumer.finish(); endConsumer.finish(); } + + @Override + FieldInfo getFieldInfo() { + return fieldInfo; + } } Index: lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (working copy) @@ -27,9 +27,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable { - final NormsWriterPerThread perThread; final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final Similarity similarity; // Holds all docID/norm pairs we've seen @@ -46,10 +45,9 @@ upto = 0; } - public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) { - this.perThread = perThread; + public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; - docState = perThread.docState; + docState = docInverterPerField.docState; fieldState = docInverterPerField.fieldState; similarity = docState.similarityProvider.get(fieldInfo.name); } Index: lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class TermsHashConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); - abstract public void abort(); -} Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 0) @@ -0,0 +1,496 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + +import java.io.IOException; +import java.io.PrintStream; +import java.text.NumberFormat; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; +import org.apache.lucene.search.SimilarityProvider; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.RamUsageEstimator; + +public class DocumentsWriterPerThread { + + /** + * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method + * which returns the DocConsumer that the DocumentsWriter calls to process the + * documents. + */ + abstract static class IndexingChain { + abstract DocConsumer getChain(DocumentsWriterPerThread documentsWriterPerThread); + } + + + static final IndexingChain defaultIndexingChain = new IndexingChain() { + + @Override + DocConsumer getChain(DocumentsWriterPerThread documentsWriterPerThread) { + /* + This is the current indexing chain: + + DocConsumer / DocConsumerPerThread + --> code: DocFieldProcessor / DocFieldProcessorPerThread + --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField + --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField + --> code: DocInverter / DocInverterPerThread / DocInverterPerField + --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: TermsHash / TermsHashPerThread / TermsHashPerField + --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField + --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField + --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField + --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField + --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField + */ + + // Build up indexing chain: + + final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriterPerThread); + final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); + + final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, + new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); + final NormsWriter normsWriter = new NormsWriter(); + final DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter); + return new DocFieldProcessor(documentsWriterPerThread, docInverter); + } + }; + + static class DocState { + final DocumentsWriterPerThread docWriter; + Analyzer analyzer; + PrintStream infoStream; + SimilarityProvider similarityProvider; + int docID; + Document doc; + String maxTermPrefix; + + DocState(DocumentsWriterPerThread docWriter) { + this.docWriter = docWriter; + } + + // Only called by asserts + public boolean testPoint(String name) { + return docWriter.writer.testPoint(name); + } + + public void clear() { + // don't hold onto doc nor analyzer, in case it is + // largish: + doc = null; + analyzer = null; + } + } + + static class FlushedSegment { + final SegmentInfo segmentInfo; + final BufferedDeletes segmentDeletes; + final BitVector deletedDocuments; + + private FlushedSegment(SegmentInfo segmentInfo, + BufferedDeletes segmentDeletes, BitVector deletedDocuments) { + this.segmentInfo = segmentInfo; + this.segmentDeletes = segmentDeletes; + this.deletedDocuments = deletedDocuments; + } + } + + /** Called if we hit an exception at a bad time (when + * updating the index files) and must discard all + * currently buffered docs. This resets our state, + * discarding any docs added since last flush. */ + void abort() throws IOException { + hasAborted = aborting = true; + try { + if (infoStream != null) { + message("docWriter: now abort"); + } + try { + consumer.abort(); + } catch (Throwable t) { + } + + pendingDeletes.clear(); + deleteSlice = deleteQueue.newSlice(); + // Reset all postings data + doAfterFlush(); + + } finally { + aborting = false; + if (infoStream != null) { + message("docWriter: done abort"); + } + } + } + + final DocumentsWriter parent; + final IndexWriter writer; + final Directory directory; + final DocState docState; + final DocConsumer consumer; + final AtomicLong bytesUsed; + + SegmentWriteState flushState; + //Deletes for our still-in-RAM (to be flushed next) segment + BufferedDeletes pendingDeletes; + String segment; // Current segment we are working on + boolean aborting = false; // True if an abort is pending + boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting + + private FieldInfos fieldInfos; + private final PrintStream infoStream; + private int numDocsInRAM; + private int flushedDocCount; + DocumentsWriterDeleteQueue deleteQueue; + DeleteSlice deleteSlice; + private final NumberFormat nf = NumberFormat.getInstance(); + + + public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent, + FieldInfos fieldInfos, IndexingChain indexingChain) { + this.directory = directory; + this.parent = parent; + this.fieldInfos = fieldInfos; + this.writer = parent.indexWriter; + this.infoStream = parent.indexWriter.getInfoStream(); + this.docState = new DocState(this); + this.docState.similarityProvider = parent.indexWriter.getConfig() + .getSimilarityProvider(); + + consumer = indexingChain.getChain(this); + bytesUsed = new AtomicLong(0); + pendingDeletes = new BufferedDeletes(false); + initialize(); + } + + public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos fieldInfos) { + this(other.directory, other.parent, fieldInfos, other.parent.chain); + } + + void initialize() { + deleteQueue = parent.deleteQueue; + assert numDocsInRAM == 0 : "num docs " + numDocsInRAM; + pendingDeletes.clear(); + deleteSlice = null; + } + + void setAborting() { + aborting = true; + } + + boolean checkAndResetHasAborted() { + final boolean retval = hasAborted; + hasAborted = false; + return retval; + } + + public void updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws IOException { + assert writer.testPoint("DocumentsWriterPerThread addDocument start"); + assert deleteQueue != null; + docState.doc = doc; + docState.analyzer = analyzer; + docState.docID = numDocsInRAM; + if (segment == null) { + // this call is synchronized on IndexWriter.segmentInfos + segment = writer.newSegmentName(); + assert numDocsInRAM == 0; + } + + boolean success = false; + try { + try { + consumer.processDocument(fieldInfos); + } finally { + docState.clear(); + } + success = true; + } finally { + if (!success) { + if (!aborting) { + // mark document as deleted + deleteDocID(docState.docID); + numDocsInRAM++; + } else { + abort(); + } + } + } + success = false; + try { + consumer.finishDocument(); + success = true; + } finally { + if (!success) { + abort(); + } + } + finishDocument(delTerm); + } + + private void finishDocument(Term delTerm) throws IOException { + /* + * here we actually finish the document in two steps 1. push the delete into + * the queue and update our slice. 2. increment the DWPT private document + * id. + * + * the updated slice we get from 1. holds all the deletes that have occurred + * since we updated the slice the last time. + */ + if (deleteSlice == null) { + deleteSlice = deleteQueue.newSlice(); + if (delTerm != null) { + deleteQueue.add(delTerm, deleteSlice); + deleteSlice.reset(); + } + + } else { + if (delTerm != null) { + deleteQueue.add(delTerm, deleteSlice); + assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item"; + deleteSlice.apply(pendingDeletes, numDocsInRAM); + } else if (deleteQueue.updateSlice(deleteSlice)) { + deleteSlice.apply(pendingDeletes, numDocsInRAM); + } + } + ++numDocsInRAM; + } + + // Buffer a specific docID for deletion. Currently only + // used when we hit a exception when adding a document + void deleteDocID(int docIDUpto) { + pendingDeletes.addDocID(docIDUpto); + // NOTE: we do not trigger flush here. This is + // potentially a RAM leak, if you have an app that tries + // to add docs but every single doc always hits a + // non-aborting exception. Allowing a flush here gets + // very messy because we are only invoked when handling + // exceptions so to do this properly, while handling an + // exception we'd have to go off and flush new deletes + // which is risky (likely would hit some other + // confounding exception). + } + + /** + * Returns the number of delete terms in this {@link DocumentsWriterPerThread} + */ + public int numDeleteTerms() { + // public for FlushPolicy + return pendingDeletes.numTermDeletes.get(); + } + + /** + * Returns the number of RAM resident documents in this {@link DocumentsWriterPerThread} + */ + public int getNumDocsInRAM() { + // public for FlushPolicy + return numDocsInRAM; + } + + SegmentCodecs getCodec() { + return flushState.segmentCodecs; + } + + /** Reset after a flush */ + private void doAfterFlush() throws IOException { + segment = null; + consumer.doAfterFlush(); + fieldInfos = new FieldInfos(fieldInfos); + parent.subtractFlushedNumDocs(numDocsInRAM); + numDocsInRAM = 0; + } + + /** + * Prepares this DWPT for flushing. This method will freeze and return the + * {@link DocumentsWriterDeleteQueue}s global buffer and apply all pending + * deletes to this DWPT. + */ + FrozenBufferedDeletes prepareFlush() { + assert numDocsInRAM > 0; + final FrozenBufferedDeletes globalDeletes = deleteQueue.freezeGlobalBuffer(deleteSlice); + /* deleteSlice can possibly be null if we have hit non-aborting exceptions during indexing and never succeeded + adding a document. */ + if (deleteSlice != null) { + // apply all deletes before we flush and release the delete slice + deleteSlice.apply(pendingDeletes, numDocsInRAM); + assert deleteSlice.isEmpty(); + deleteSlice = null; + } + return globalDeletes; + } + + /** Flush all pending docs to a new segment */ + FlushedSegment flush() throws IOException { + assert numDocsInRAM > 0; + assert deleteSlice == null : "all deletes must be applied in prepareFlush"; + flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos, + numDocsInRAM, writer.getConfig().getTermIndexInterval(), + fieldInfos.buildSegmentCodecs(true), pendingDeletes); + final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.; + // Apply delete-by-docID now (delete-byDocID only + // happens when an exception is hit processing that + // doc, eg if analyzer has some problem w/ the text): + if (pendingDeletes.docIDs.size() > 0) { + flushState.deletedDocs = new BitVector(numDocsInRAM); + for(int delDocID : pendingDeletes.docIDs) { + flushState.deletedDocs.set(delDocID); + } + pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); + pendingDeletes.docIDs.clear(); + } + + if (infoStream != null) { + message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); + } + + if (aborting) { + if (infoStream != null) { + message("flush: skip because aborting is set"); + } + return null; + } + + boolean success = false; + + try { + + SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos); + consumer.flush(flushState); + pendingDeletes.terms.clear(); + newSegment.setHasVectors(flushState.hasVectors); + + if (infoStream != null) { + message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs"); + message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); + message("flushedFiles=" + newSegment.files()); + message("flushed codecs=" + newSegment.getSegmentCodecs()); + } + flushedDocCount += flushState.numDocs; + + final BufferedDeletes segmentDeletes; + if (pendingDeletes.queries.isEmpty()) { + pendingDeletes.clear(); + segmentDeletes = null; + } else { + segmentDeletes = pendingDeletes; + pendingDeletes = new BufferedDeletes(false); + } + + if (infoStream != null) { + final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; + final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; + message("flushed: segment=" + newSegment + + " ramUsed=" + nf.format(startMBUsed) + " MB" + + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + + " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + + " docs/MB=" + nf.format(flushedDocCount / newSegmentSize) + + " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); + } + doAfterFlush(); + success = true; + + return new FlushedSegment(newSegment, segmentDeletes, flushState.deletedDocs); + } finally { + if (!success) { + if (segment != null) { + synchronized(parent.indexWriter) { + parent.indexWriter.deleter.refresh(segment); + } + } + abort(); + } + } + } + + /** Get current segment name we are writing. */ + String getSegment() { + return segment; + } + + long bytesUsed() { + return bytesUsed.get() + pendingDeletes.bytesUsed.get(); + } + + FieldInfos getFieldInfos() { + return fieldInfos; + } + + void message(String message) { + writer.message("DWPT: " + message); + } + + /* Initial chunks size of the shared byte[] blocks used to + store postings data */ + final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; + + /* if you increase this, you must fix field cache impl for + * getTerms/getTermsIndex requires <= 32768 */ + final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; + + /* Initial chunks size of the shared int[] blocks used to + store postings data */ + final static int INT_BLOCK_SHIFT = 13; + final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; + final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; + + /* Allocate another int[] from the shared pool */ + int[] getIntBlock() { + int[] b = new int[INT_BLOCK_SIZE]; + bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT); + return b; + } + + void recycleIntBlocks(int[][] blocks, int offset, int length) { + bytesUsed.addAndGet(-(length *(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT))); + } + + final Allocator byteBlockAllocator = new DirectTrackingAllocator(); + + + private class DirectTrackingAllocator extends Allocator { + public DirectTrackingAllocator() { + this(BYTE_BLOCK_SIZE); + } + + public DirectTrackingAllocator(int blockSize) { + super(blockSize); + } + + public byte[] getByteBlock() { + bytesUsed.addAndGet(blockSize); + return new byte[blockSize]; + } + @Override + public void recycleByteBlocks(byte[][] blocks, int start, int end) { + bytesUsed.addAndGet(-((end-start)* blockSize)); + for (int i = start; i < end; i++) { + blocks[i] = null; + } + } + + }; +} Property changes on: lucene\src\java\org\apache\lucene\index\DocumentsWriterPerThread.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldsWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/FieldsWriter.java (working copy) @@ -2,13 +2,13 @@ /** * Copyright 2004 The Apache Software Foundation - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -22,15 +22,14 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMOutputStream; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.IOUtils; final class FieldsWriter { static final byte FIELD_IS_TOKENIZED = 0x1; static final byte FIELD_IS_BINARY = 0x2; - + // Lucene 3.0: Removal of compressed fields static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; @@ -38,7 +37,7 @@ // than the current one, and always change this if you // switch to a new format! static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - + // when removing support for old versions, leave the last supported version here static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; @@ -83,10 +82,9 @@ // and adds a new entry for this document into the index // stream. This assumes the buffer was already written // in the correct fields format. - void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException { + void startDocument(int numStoredFields) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); fieldsStream.writeVInt(numStoredFields); - buffer.writeTo(fieldsStream); } void skipDocument() throws IOException { @@ -121,8 +119,8 @@ } } - final void writeField(FieldInfo fi, Fieldable field) throws IOException { - fieldsStream.writeVInt(fi.number); + final void writeField(int fieldNumber, Fieldable field) throws IOException { + fieldsStream.writeVInt(fieldNumber); byte bits = 0; if (field.isTokenized()) bits |= FieldsWriter.FIELD_IS_TOKENIZED; @@ -175,10 +173,9 @@ fieldsStream.writeVInt(storedCount); - for (Fieldable field : fields) { if (field.isStored()) - writeField(fieldInfos.fieldInfo(field.name()), field); + writeField(fieldInfos.fieldNumber(field.name()), field); } } } Index: lucene/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -37,14 +37,14 @@ /** * Information about a segment such as it's name, directory, and files related * to the segment. - * + * * @lucene.experimental */ public final class SegmentInfo { static final int NO = -1; // e.g. no norms; no deletes; static final int YES = 1; // e.g. have norms; have deletes; - static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. + static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. public String name; // unique name in dir public int docCount; // number of docs in seg @@ -56,7 +56,7 @@ * - YES or higher if there are deletes at generation N */ private long delGen; - + /* * Current generation of each field's norm file. If this array is null, * means no separate norms. If this array is not null, its values mean: @@ -65,7 +65,7 @@ */ private Map normGen; - private boolean isCompoundFile; + private boolean isCompoundFile; private volatile List files; // cached list of files that this segment uses // in the Directory @@ -91,9 +91,9 @@ private Map diagnostics; - // Tracks the Lucene version this segment was created with, since 3.1. Null + // Tracks the Lucene version this segment was created with, since 3.1. Null // indicates an older than 3.0 index, and it's used to detect a too old index. - // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and + // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and // specific versions afterwards ("3.0", "3.1" etc.). // see Constants.LUCENE_MAIN_VERSION. private String version; @@ -101,7 +101,7 @@ // NOTE: only used in-RAM by IW to track buffered deletes; // this is never written to/read from the Directory private long bufferedDeletesGen; - + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) { this.name = name; @@ -182,11 +182,13 @@ docStoreSegment = name; docStoreIsCompoundFile = false; } + if (format > DefaultSegmentInfosWriter.FORMAT_4_0) { // pre-4.0 indexes write a byte if there is a single norms file byte b = input.readByte(); assert 1 == b; } + int numNormGen = input.readInt(); if (numNormGen == NO) { normGen = null; @@ -207,7 +209,7 @@ assert delCount <= docCount; hasProx = input.readByte() == YES; - + // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name); if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) { segmentCodecs = new SegmentCodecs(codecs, input); @@ -217,7 +219,7 @@ segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")}); } diagnostics = input.readStringStringMap(); - + if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) { hasVectors = input.readByte() == 1; } else { @@ -366,7 +368,7 @@ // against this segment return null; } else { - return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); + return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -432,7 +434,7 @@ if (hasSeparateNorms(number)) { return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number)); } else { - // single file for all norms + // single file for all norms return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN); } } @@ -468,36 +470,36 @@ public int getDocStoreOffset() { return docStoreOffset; } - + public boolean getDocStoreIsCompoundFile() { return docStoreIsCompoundFile; } - - void setDocStoreIsCompoundFile(boolean v) { - docStoreIsCompoundFile = v; + + public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) { + this.docStoreIsCompoundFile = docStoreIsCompoundFile; clearFilesCache(); } - + + void setDocStore(int offset, String segment, boolean isCompoundFile) { + docStoreOffset = offset; + docStoreSegment = segment; + docStoreIsCompoundFile = isCompoundFile; + clearFilesCache(); + } + public String getDocStoreSegment() { return docStoreSegment; } - - public void setDocStoreSegment(String segment) { - docStoreSegment = segment; - } - + void setDocStoreOffset(int offset) { docStoreOffset = offset; clearFilesCache(); } - void setDocStore(int offset, String segment, boolean isCompoundFile) { - docStoreOffset = offset; - docStoreSegment = segment; - docStoreIsCompoundFile = isCompoundFile; - clearFilesCache(); + public void setDocStoreSegment(String docStoreSegment) { + this.docStoreSegment = docStoreSegment; } - + /** Save this segment's info. */ public void write(IndexOutput output) throws IOException { @@ -522,7 +524,7 @@ output.writeLong(entry.getValue()); } } - + output.writeByte((byte) (isCompoundFile ? YES : NO)); output.writeInt(delCount); output.writeByte((byte) (hasProx ? 1:0)); @@ -570,9 +572,9 @@ // Already cached: return files; } - + Set fileSet = new HashSet(); - + boolean useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { @@ -606,7 +608,7 @@ fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - } + } } String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); @@ -644,7 +646,7 @@ } /** Used for debugging. Format may suddenly change. - * + * *

Current format looks like * _a(3.1):c45/4->_1, which means the segment's * name is _a; it was created with Lucene 3.1 (or @@ -674,7 +676,7 @@ if (delCount != 0) { s.append('/').append(delCount); } - + if (docStoreOffset != -1) { s.append("->").append(docStoreSegment); if (docStoreIsCompoundFile) { @@ -714,13 +716,13 @@ * NOTE: this method is used for internal purposes only - you should * not modify the version of a SegmentInfo, or it may result in unexpected * exceptions thrown when you attempt to open the index. - * + * * @lucene.internal */ public void setVersion(String version) { this.version = version; } - + /** Returns the version of the code which wrote the segment. */ public String getVersion() { return version; Index: lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (working copy) @@ -81,6 +81,6 @@ } public int getAddress() { - return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); + return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK); } } \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (working copy) @@ -17,21 +17,23 @@ * limitations under the License. */ -import java.util.Collection; +import java.io.IOException; import java.util.Map; -import java.io.IOException; abstract class InvertedDocConsumer { - /** Add a new thread */ - abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - /** Abort (called after hitting AbortException) */ abstract void abort(); /** Flush a new segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; + abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + + abstract void startDocument() throws IOException; + + abstract void finishDocument() throws IOException; + /** Attempt to free RAM, returning true if any RAM was * freed */ abstract boolean freeRAM(); Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (working copy) @@ -18,22 +18,25 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class DocFieldConsumer { - /** Called when DocumentsWriter decides to create a new + /** Called when DocumentsWriterPerThread decides to create a new * segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; /** Called when an aborting exception is hit */ abstract void abort(); - /** Add a new thread */ - abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException; - - /** Called when DocumentsWriter is using too much RAM. + /** Called when DocumentsWriterPerThread is using too much RAM. * The consumer should free RAM, if possible, returning * true if any RAM was in fact freed. */ abstract boolean freeRAM(); - } + + abstract void startDocument() throws IOException; + + abstract DocFieldConsumerPerField addField(FieldInfo fi); + + abstract void finishDocument() throws IOException; + +} Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (revision 0) @@ -0,0 +1,396 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.lucene.search.Query; + +/** + * {@link DocumentsWriterDeleteQueue} is a non-blocking linked pending deletes + * queue. In contrast to other queue implementation we only maintain the + * tail of the queue. A delete queue is always used in a context of a set of + * DWPTs and a global delete pool. Each of the DWPT and the global pool need to + * maintain their 'own' head of the queue (as a DeleteSlice instance per DWPT). + * The difference between the DWPT and the global pool is that the DWPT starts + * maintaining a head once it has added its first document since for its segments + * private deletes only the deletes after that document are relevant. The global + * pool instead starts maintaining the head once this instance is created by + * taking the sentinel instance as its initial head. + *

+ * Since each {@link DeleteSlice} maintains its own head and the list is only + * single linked the garbage collector takes care of pruning the list for us. + * All nodes in the list that are still relevant should be either directly or + * indirectly referenced by one of the DWPT's private {@link DeleteSlice} or by + * the global {@link BufferedDeletes} slice. + *

+ * Each DWPT as well as the global delete pool maintain their private + * DeleteSlice instance. In the DWPT case updating a slice is equivalent to + * atomically finishing the document. The slice update guarantees a "happens + * before" relationship to all other updates in the same indexing session. When a + * DWPT updates a document it: + * + *

    + *
  1. consumes a document and finishes its processing
  2. + *
  3. updates its private {@link DeleteSlice} either by calling + * {@link #updateSlice(DeleteSlice)} or {@link #add(Term, DeleteSlice)} (if the + * document has a delTerm)
  4. + *
  5. applies all deletes in the slice to its private {@link BufferedDeletes} + * and resets it
  6. + *
  7. increments its internal document id
  8. + *
+ * + * The DWPT also doesn't apply its current documents delete term until it has + * updated its delete slice which ensures the consistency of the update. If the + * update fails before the DeleteSlice could have been updated the deleteTerm + * will also not be added to its private deletes neither to the global deletes. + * + */ +final class DocumentsWriterDeleteQueue { + + private volatile Node tail; + + private static final AtomicReferenceFieldUpdater tailUpdater = AtomicReferenceFieldUpdater + .newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail"); + + private final DeleteSlice globalSlice; + private final BufferedDeletes globalBufferedDeletes; + /* only acquired to update the global deletes */ + private final ReentrantLock globalBufferLock = new ReentrantLock(); + + final long generation; + + DocumentsWriterDeleteQueue() { + this(0); + } + + DocumentsWriterDeleteQueue(long generation) { + this(new BufferedDeletes(false), generation); + } + + DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, long generation) { + this.globalBufferedDeletes = globalBufferedDeletes; + this.generation = generation; + /* + * we use a sentinel instance as our initial tail. No slice will ever try to + * apply this tail since the head is always omitted. + */ + tail = new Node(null); // sentinel + globalSlice = new DeleteSlice(tail); + } + + void addDelete(Query... queries) { + add(new QueryArrayNode(queries)); + tryApplyGlobalSlice(); + } + + void addDelete(Term... terms) { + add(new TermArrayNode(terms)); + tryApplyGlobalSlice(); + } + + /** + * invariant for document update + */ + void add(Term term, DeleteSlice slice) { + final TermNode termNode = new TermNode(term); + add(termNode); + /* + * this is an update request where the term is the updated documents + * delTerm. in that case we need to guarantee that this insert is atomic + * with regards to the given delete slice. This means if two threads try to + * update the same document with in turn the same delTerm one of them must + * win. By taking the node we have created for our del term as the new tail + * it is guaranteed that if another thread adds the same right after us we + * will apply this delete next time we update our slice and one of the two + * competing updates wins! + */ + slice.sliceTail = termNode; + assert slice.sliceHead != slice.sliceTail : "slice head and tail must differ after add"; + tryApplyGlobalSlice(); // TODO doing this each time is not necessary maybe + // we can do it just every n times or so? + } + + void add(Node item) { + /* + * this non-blocking / 'wait-free' linked list add was inspired by Apache + * Harmony's ConcurrentLinkedQueue Implementation. + */ + while (true) { + final Node currentTail = this.tail; + final Node tailNext = currentTail.next; + if (tail == currentTail) { + if (tailNext != null) { + /* + * we are in intermediate state here. the tails next pointer has been + * advanced but the tail itself might not be updated yet. help to + * advance the tail and try again updating it. + */ + tailUpdater.compareAndSet(this, currentTail, tailNext); // can fail + } else { + /* + * we are in quiescent state and can try to insert the item to the + * current tail if we fail to insert we just retry the operation since + * somebody else has already added its item + */ + if (currentTail.casNext(null, item)) { + /* + * now that we are done we need to advance the tail while another + * thread could have advanced it already so we can ignore the return + * type of this CAS call + */ + tailUpdater.compareAndSet(this, currentTail, item); + return; + } + } + } + } + } + + boolean anyChanges() { + globalBufferLock.lock(); + try { + return !globalSlice.isEmpty() || globalBufferedDeletes.any(); + } finally { + globalBufferLock.unlock(); + } + } + + void tryApplyGlobalSlice() { + if (globalBufferLock.tryLock()) { + /* + * The global buffer must be locked but we don't need to upate them if + * there is an update going on right now. It is sufficient to apply the + * deletes that have been added after the current in-flight global slices + * tail the next time we can get the lock! + */ + try { + if (updateSlice(globalSlice)) { + globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + } + } finally { + globalBufferLock.unlock(); + } + } + } + + FrozenBufferedDeletes freezeGlobalBuffer(DeleteSlice callerSlice) { + globalBufferLock.lock(); + /* + * Here we freeze the global buffer so we need to lock it, apply all + * deletes in the queue and reset the global slice to let the GC prune the + * queue. + */ + final Node currentTail = tail; // take the current tail make this local any + // Changes after this call are applied later + // and not relevant here + if (callerSlice != null) { + // Update the callers slices so we are on the same page + callerSlice.sliceTail = currentTail; + } + try { + if (globalSlice.sliceTail != currentTail) { + globalSlice.sliceTail = currentTail; + globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + } + + final FrozenBufferedDeletes packet = new FrozenBufferedDeletes( + globalBufferedDeletes, false); + globalBufferedDeletes.clear(); + return packet; + } finally { + globalBufferLock.unlock(); + } + } + + DeleteSlice newSlice() { + return new DeleteSlice(tail); + } + + boolean updateSlice(DeleteSlice slice) { + if (slice.sliceTail != tail) { // If we are the same just + slice.sliceTail = tail; + return true; + } + return false; + } + + static class DeleteSlice { + // No need to be volatile, slices are thread captive (only accessed by one thread)! + Node sliceHead; // we don't apply this one + Node sliceTail; + + DeleteSlice(Node currentTail) { + assert currentTail != null; + /* + * Initially this is a 0 length slice pointing to the 'current' tail of + * the queue. Once we update the slice we only need to assign the tail and + * have a new slice + */ + sliceHead = sliceTail = currentTail; + } + + void apply(BufferedDeletes del, int docIDUpto) { + if (sliceHead == sliceTail) { + // 0 length slice + return; + } + /* + * When we apply a slice we take the head and get its next as our first + * item to apply and continue until we applied the tail. If the head and + * tail in this slice are not equal then there will be at least one more + * non-null node in the slice! + */ + Node current = sliceHead; + do { + current = current.next; + assert current != null : "slice property violated between the head on the tail must not be a null node"; + current.apply(del, docIDUpto); + } while (current != sliceTail); + reset(); + } + + void reset() { + // Reset to a 0 length slice + sliceHead = sliceTail; + } + + /** + * Returns true iff the given item is identical to the item + * hold by the slices tail, otherwise false. + */ + boolean isTailItem(Object item) { + return sliceTail.item == item; + } + + boolean isEmpty() { + return sliceHead == sliceTail; + } + } + + public int numGlobalTermDeletes() { + return globalBufferedDeletes.numTermDeletes.get(); + } + + void clear() { + globalBufferLock.lock(); + try { + final Node currentTail = tail; + globalSlice.sliceHead = globalSlice.sliceTail = currentTail; + globalBufferedDeletes.clear(); + } finally { + globalBufferLock.unlock(); + } + } + + private static class Node { + volatile Node next; + final Object item; + + private Node(Object item) { + this.item = item; + } + + static final AtomicReferenceFieldUpdater nextUpdater = AtomicReferenceFieldUpdater + .newUpdater(Node.class, Node.class, "next"); + + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + assert false : "sentinel item must never be applied"; + } + + boolean casNext(Node cmp, Node val) { + return nextUpdater.compareAndSet(this, cmp, val); + } + } + + private static final class TermNode extends Node { + + TermNode(Term term) { + super(term); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + bufferedDeletes.addTerm((Term) item, docIDUpto); + } + } + + private static final class QueryArrayNode extends Node { + QueryArrayNode(Query[] query) { + super(query); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + final Query[] queries = (Query[]) item; + for (Query query : queries) { + bufferedDeletes.addQuery(query, docIDUpto); + } + } + } + + private static final class TermArrayNode extends Node { + TermArrayNode(Term[] term) { + super(term); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + final Term[] terms = (Term[]) item; + for (Term term : terms) { + bufferedDeletes.addTerm(term, docIDUpto); + } + } + } + + + private boolean forceApplyGlobalSlice() { + globalBufferLock.lock(); + final Node currentTail = tail; + try { + if (globalSlice.sliceTail != currentTail) { + globalSlice.sliceTail = currentTail; + globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + } + return globalBufferedDeletes.any(); + } finally { + globalBufferLock.unlock(); + } + } + + public int getBufferedDeleteTermsSize() { + globalBufferLock.lock(); + try { + forceApplyGlobalSlice(); + return globalBufferedDeletes.terms.size(); + } finally { + globalBufferLock.unlock(); + } + } + + public long bytesUsed() { + return globalBufferedDeletes.bytesUsed.get(); + } + + @Override + public String toString() { + return "DWDQ: [ generation: " + generation + " ]"; + } + + +} Property changes on: lucene\src\java\org\apache\lucene\index\DocumentsWriterDeleteQueue.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (working copy) @@ -24,4 +24,5 @@ /** Processes all occurrences of a single field */ abstract void processFields(Fieldable[] fields, int count) throws IOException; abstract void abort(); + abstract FieldInfo getFieldInfo(); } Index: lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -33,8 +33,8 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; -/* Tracks the stream of {@link BuffereDeletes}. - * When DocumensWriter flushes, its buffered +/* Tracks the stream of {@link BufferedDeletes}. + * When DocumentsWriterPerThread flushes, its buffered * deletes are appended to this stream. We later * apply these deletes (resolve them to the actual * docIDs, per segment) when a merge is started @@ -60,7 +60,7 @@ // used only by assert private Term lastDeleteTerm; - + private PrintStream infoStream; private final AtomicLong bytesUsed = new AtomicLong(); private final AtomicInteger numTerms = new AtomicInteger(); @@ -75,26 +75,36 @@ infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); } } - + public synchronized void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; } // Appends a new packet of buffered deletes to the stream, // setting its generation: - public synchronized void push(FrozenBufferedDeletes packet) { + public synchronized long push(FrozenBufferedDeletes packet) { + /* + * The insert operation must be atomic. If we let threads increment the gen + * and push the packet afterwards we risk that packets are out of order. + * With DWPT this is possible if two or more flushes are racing for pushing + * updates. If the pushed packets get our of order would loose documents + * since deletes are applied to the wrong segments. + */ + packet.setDelGen(nextGen++); assert packet.any(); - assert checkDeleteStats(); - assert packet.gen < nextGen; + assert checkDeleteStats(); + assert packet.delGen() < nextGen; + assert deletes.isEmpty() || deletes.get(deletes.size()-1).delGen() < packet.delGen() : "Delete packets must be in order"; deletes.add(packet); numTerms.addAndGet(packet.numTermDeletes); bytesUsed.addAndGet(packet.bytesUsed); if (infoStream != null) { - message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size()); + message("push deletes " + packet + " delGen=" + packet.delGen() + " packetCount=" + deletes.size()); } - assert checkDeleteStats(); + assert checkDeleteStats(); + return packet.delGen(); } - + public synchronized void clear() { deletes.clear(); nextGen = 1; @@ -132,7 +142,7 @@ } // Sorts SegmentInfos from smallest to biggest bufferedDelGen: - private static final Comparator sortByDelGen = new Comparator() { + private static final Comparator sortSegInfoByDelGen = new Comparator() { // @Override -- not until Java 1.6 public int compare(SegmentInfo si1, SegmentInfo si2) { final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen(); @@ -147,10 +157,10 @@ @Override public boolean equals(Object other) { - return sortByDelGen == other; + return sortSegInfoByDelGen == other; } }; - + /** Resolves the buffered deleted Term/Query/docIDs, into * actual deleted docIDs in the deletedDocs BitVector for * each SegmentReader. */ @@ -174,7 +184,7 @@ SegmentInfos infos2 = new SegmentInfos(); infos2.addAll(infos); - Collections.sort(infos2, sortByDelGen); + Collections.sort(infos2, sortSegInfoByDelGen); BufferedDeletes coalescedDeletes = null; boolean anyNewDeletes = false; @@ -191,19 +201,30 @@ final SegmentInfo info = infos2.get(infosIDX); final long segGen = info.getBufferedDeletesGen(); - if (packet != null && segGen < packet.gen) { + if (packet != null && segGen < packet.delGen()) { //System.out.println(" coalesce"); if (coalescedDeletes == null) { coalescedDeletes = new BufferedDeletes(true); } - coalescedDeletes.update(packet); + if (!packet.isSegmentPrivate) { + /* + * Only coalesce if we are NOT on a segment private del packet: the segment private del packet + * must only applied to segments with the same delGen. Yet, if a segment is already deleted + * from the SI since it had no more documents remaining after some del packets younger than + * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been + * removed. + */ + coalescedDeletes.update(packet); + } + delIDX--; - } else if (packet != null && segGen == packet.gen) { + } else if (packet != null && segGen == packet.delGen()) { + assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet"; //System.out.println(" eq"); // Lock order: IW -> BD -> RP assert readerPool.infoIsLive(info); - SegmentReader reader = readerPool.get(info, false); + final SegmentReader reader = readerPool.get(info, false); int delCount = 0; final boolean segAllDeletes; try { @@ -213,7 +234,7 @@ delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); } //System.out.println(" del exact"); - // Don't delete by Term here; DocumentsWriter + // Don't delete by Term here; DocumentsWriterPerThread // already did that on flush: delCount += applyQueryDeletes(packet.queriesIterable(), reader); segAllDeletes = reader.numDocs() == 0; @@ -236,7 +257,12 @@ if (coalescedDeletes == null) { coalescedDeletes = new BufferedDeletes(true); } - coalescedDeletes.update(packet); + + /* + * Since we are on a segment private del packet we must not + * update the coalescedDeletes here! We can simply advance to the + * next packet and seginfo. + */ delIDX--; infosIDX--; info.setBufferedDeletesGen(nextGen); @@ -281,11 +307,11 @@ message("applyDeletes took " + (System.currentTimeMillis()-t0) + " msec"); } // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; - + return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted); } - public synchronized long getNextGen() { + synchronized long getNextGen() { return nextGen++; } @@ -303,10 +329,9 @@ if (infoStream != null) { message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size()); } - final int limit = deletes.size(); for(int delIDX=0;delIDX= minGen) { + if (deletes.get(delIDX).delGen() >= minGen) { prune(delIDX); assert checkDeleteStats(); return; @@ -345,10 +370,10 @@ } TermsEnum termsEnum = null; - + String currentField = null; DocsEnum docs = null; - + assert checkDeleteTerm(null); for (Term term : termsIter) { @@ -372,10 +397,10 @@ assert checkDeleteTerm(term); // System.out.println(" term=" + term); - + if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) { DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); - + if (docsEnum != null) { while (true) { final int docID = docsEnum.nextDoc(); @@ -401,7 +426,7 @@ public final Query query; public final int limit; public QueryAndLimit(Query query, int limit) { - this.query = query; + this.query = query; this.limit = limit; } } @@ -449,7 +474,7 @@ lastDeleteTerm = term; return true; } - + // only for assert private boolean checkDeleteStats() { int numTerms2 = 0; Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -1,307 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.io.IOException; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -/** - * Gathers all Fieldables for a document under the same - * name, updates FieldInfos, and calls per-field consumers - * to process field by field. - * - * Currently, only a single thread visits the fields, - * sequentially, for processing. - */ - -final class DocFieldProcessorPerThread extends DocConsumerPerThread { - - float docBoost; - int fieldGen; - final DocFieldProcessor docFieldProcessor; - final DocFieldConsumerPerThread consumer; - - // Holds all fields seen in current doc - DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; - int fieldCount; - - // Hash table for all fields seen in current segment - DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; - int hashMask = 1; - int totalFieldCount; - - final StoredFieldsWriterPerThread fieldsWriter; - - final DocumentsWriter.DocState docState; - - public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException { - this.docState = threadState.docState; - this.docFieldProcessor = docFieldProcessor; - this.consumer = docFieldProcessor.consumer.addThread(this); - fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState); - } - - @Override - public void abort() { - for(int i=0;i fields() { - Collection fields = new HashSet(); - for(int i=0;i fieldHash.length; - - final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; - - // Rehash - int newHashMask = newHashSize-1; - for(int j=0;j docFields = doc.getFields(); - final int numDocFields = docFields.size(); - - // Absorb any new fields first seen in this document. - // Also absorb any changes to fields we had already - // seen before (eg suddenly turning on norms or - // vectors, etc.): - - for(int i=0;i= fieldHash.length/2) - rehash(); - } else { - fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), - field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false, field.getOmitTermFreqAndPositions()); - } - if (thisFieldGen != fp.lastGen) { - - // First time we're seeing this field for this doc - fp.fieldCount = 0; - - if (fieldCount == fields.length) { - final int newSize = fields.length*2; - DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; - System.arraycopy(fields, 0, newArray, 0, fieldCount); - fields = newArray; - } - - fields[fieldCount++] = fp; - fp.lastGen = thisFieldGen; - } - - if (fp.fieldCount == fp.fields.length) { - Fieldable[] newArray = new Fieldable[fp.fields.length*2]; - System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount); - fp.fields = newArray; - } - - fp.fields[fp.fieldCount++] = field; - if (field.isStored()) { - fieldsWriter.addField(field, fp.fieldInfo); - } - } - - // If we are writing vectors then we must visit - // fields in sorted order so they are written in - // sorted order. TODO: we actually only need to - // sort the subset of fields that have vectors - // enabled; we could save [small amount of] CPU - // here. - ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); - - for(int i=0;i fieldsComp = new Comparator() { - public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { - return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); - } - }; - - PerDoc[] docFreeList = new PerDoc[1]; - int freeCount; - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else - return docFreeList[--freeCount]; - } - - synchronized void freePerDoc(PerDoc perDoc) { - assert freeCount < docFreeList.length; - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - - DocumentsWriter.DocWriter one; - DocumentsWriter.DocWriter two; - - @Override - public long sizeInBytes() { - return one.sizeInBytes() + two.sizeInBytes(); - } - - @Override - public void finish() throws IOException { - try { - try { - one.finish(); - } finally { - two.finish(); - } - } finally { - freePerDoc(this); - } - } - - @Override - public void abort() { - try { - try { - one.abort(); - } finally { - two.abort(); - } - } finally { - freePerDoc(this); - } - } - } -} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java (revision 0) @@ -0,0 +1,90 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** This is just a "splitter" class: it lets you wrap two + * DocFieldConsumer instances as a single consumer. */ + +final class DocFieldConsumers extends DocFieldConsumer { + final DocFieldConsumer one; + final DocFieldConsumer two; + final DocumentsWriterPerThread.DocState docState; + + public DocFieldConsumers(DocFieldProcessor processor, DocFieldConsumer one, DocFieldConsumer two) { + this.one = one; + this.two = two; + this.docState = processor.docState; + } + + @Override + public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { + + Map oneFieldsToFlush = new HashMap(); + Map twoFieldsToFlush = new HashMap(); + + for (Map.Entry fieldToFlush : fieldsToFlush.entrySet()) { + DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldToFlush.getValue(); + oneFieldsToFlush.put(fieldToFlush.getKey(), perField.one); + twoFieldsToFlush.put(fieldToFlush.getKey(), perField.two); + } + + one.flush(oneFieldsToFlush, state); + two.flush(twoFieldsToFlush, state); + } + + @Override + public void abort() { + try { + one.abort(); + } finally { + two.abort(); + } + } + + @Override + public boolean freeRAM() { + boolean any = one.freeRAM(); + any |= two.freeRAM(); + return any; + } + + @Override + public void finishDocument() throws IOException { + try { + one.finishDocument(); + } finally { + two.finishDocument(); + } + } + + @Override + public void startDocument() throws IOException { + one.startDocument(); + two.startDocument(); + } + + @Override + public DocFieldConsumerPerField addField(FieldInfo fi) { + return new DocFieldConsumersPerField(this, fi, one.addField(fi), two.addField(fi)); + } + +} Property changes on: lucene\src\java\org\apache\lucene\index\DocFieldConsumers.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (working copy) @@ -17,12 +17,13 @@ * limitations under the License. */ -import java.util.Collection; +import java.io.IOException; import java.util.Map; -import java.io.IOException; abstract class InvertedDocEndConsumer { - abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; abstract void abort(); + abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + abstract void startDocument() throws IOException; + abstract void finishDocument() throws IOException; } Index: lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (working copy) @@ -18,11 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class TermsHashConsumer { - abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread); - abstract void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException; abstract void abort(); - } + abstract void startDocument() throws IOException; + abstract void finishDocument(TermsHash termsHash) throws IOException; + abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); +} Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -75,7 +75,7 @@ protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; protected boolean calibrateSizeByDeletes = true; - + protected boolean useCompoundFile = true; public LogMergePolicy() { @@ -103,7 +103,7 @@ } this.noCFSRatio = noCFSRatio; } - + protected void message(String message) { if (verbose()) writer.get().message("LMP: " + message); @@ -169,7 +169,7 @@ this.calibrateSizeByDeletes = calibrateSizeByDeletes; } - /** Returns true if the segment size should be calibrated + /** Returns true if the segment size should be calibrated * by the number of deletes when choosing segments for merge. */ public boolean getCalibrateSizeByDeletes() { return calibrateSizeByDeletes; @@ -189,7 +189,7 @@ return info.docCount; } } - + protected long sizeBytes(SegmentInfo info) throws IOException { long byteSize = info.sizeInBytes(true); if (calibrateSizeByDeletes) { @@ -201,7 +201,7 @@ return byteSize; } } - + protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set segmentsToOptimize) throws IOException { final int numSegments = infos.size(); int numToOptimize = 0; @@ -273,7 +273,7 @@ return spec.merges.size() == 0 ? null : spec; } - + /** * Returns the merges necessary to optimize the index. This method constraints * the returned merges only by the {@code maxNumSegments} parameter, and @@ -281,7 +281,7 @@ */ private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException { MergeSpecification spec = new MergeSpecification(); - + // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { @@ -331,7 +331,7 @@ } return spec.merges.size() == 0 ? null : spec; } - + /** Returns the merges necessary to optimize the index. * This merge policy defines "optimized" to mean only the * requested number of segments is left in the index, and @@ -379,7 +379,7 @@ } return null; } - + // There is only one segment already, and it is optimized if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) { if (verbose()) { @@ -397,7 +397,7 @@ break; } } - + if (anyTooLarge) { return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last); } else { @@ -409,7 +409,7 @@ * Finds merges necessary to expunge all deletes from the * index. We simply merge adjacent segments that have * deletes, up to mergeFactor at a time. - */ + */ @Override public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { @@ -462,7 +462,7 @@ SegmentInfo info; float level; int index; - + public SegmentInfoAndLevel(SegmentInfo info, float level, int index) { this.info = info; this.level = level; @@ -658,5 +658,5 @@ sb.append("]"); return sb.toString(); } - + } Index: lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (working copy) @@ -1,79 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.document.Fieldable; - -final class StoredFieldsWriterPerThread { - - final FieldsWriter localFieldsWriter; - final StoredFieldsWriter storedFieldsWriter; - final DocumentsWriter.DocState docState; - - StoredFieldsWriter.PerDoc doc; - - public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException { - this.storedFieldsWriter = storedFieldsWriter; - this.docState = docState; - localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null); - } - - public void startDocument() { - if (doc != null) { - // Only happens if previous document hit non-aborting - // exception while writing stored fields into - // localFieldsWriter: - doc.reset(); - doc.docID = docState.docID; - } - } - - public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { - if (doc == null) { - doc = storedFieldsWriter.getPerDoc(); - doc.docID = docState.docID; - localFieldsWriter.setFieldsStream(doc.fdt); - assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields; - assert 0 == doc.fdt.length(); - assert 0 == doc.fdt.getFilePointer(); - } - - localFieldsWriter.writeField(fieldInfo, field); - assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); - doc.numStoredFields++; - } - - public DocumentsWriter.DocWriter finishDocument() { - // If there were any stored fields in this doc, doc will - // be non-null; else it's null. - try { - return doc; - } finally { - doc = null; - } - } - - public void abort() { - if (doc != null) { - doc.abort(); - doc = null; - } - } -} Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java (revision 0) @@ -0,0 +1,56 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import org.apache.lucene.document.Fieldable; + +final class DocFieldConsumersPerField extends DocFieldConsumerPerField { + + final DocFieldConsumerPerField one; + final DocFieldConsumerPerField two; + final DocFieldConsumers parent; + final FieldInfo fieldInfo; + + public DocFieldConsumersPerField(DocFieldConsumers parent, FieldInfo fi, DocFieldConsumerPerField one, DocFieldConsumerPerField two) { + this.parent = parent; + this.one = one; + this.two = two; + this.fieldInfo = fi; + } + + @Override + public void processFields(Fieldable[] fields, int count) throws IOException { + one.processFields(fields, count); + two.processFields(fields, count); + } + + @Override + public void abort() { + try { + one.abort(); + } finally { + two.abort(); + } + } + + @Override + FieldInfo getFieldInfo() { + return fieldInfo; + } +} Property changes on: lucene\src\java\org\apache\lucene\index\DocFieldConsumersPerField.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (working copy) @@ -18,7 +18,8 @@ */ import java.io.IOException; -import org.apache.lucene.store.RAMOutputStream; + +import org.apache.lucene.document.Fieldable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -26,22 +27,38 @@ final class StoredFieldsWriter { FieldsWriter fieldsWriter; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; int lastDocID; - PerDoc[] docFreeList = new PerDoc[1]; int freeCount; - public StoredFieldsWriter(DocumentsWriter docWriter) { + final DocumentsWriterPerThread.DocState docState; + + public StoredFieldsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + this.docState = docWriter.docState; } - public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException { - return new StoredFieldsWriterPerThread(docState, this); + private int numStoredFields; + private Fieldable[] storedFields; + private int[] fieldNumbers; + + public void reset() { + numStoredFields = 0; + storedFields = new Fieldable[1]; + fieldNumbers = new int[1]; } - synchronized public void flush(SegmentWriteState state) throws IOException { - if (state.numDocs > lastDocID) { + public void startDocument() { + reset(); + } + + public void flush(SegmentWriteState state) throws IOException { + + if (state.numDocs > 0) { + // It's possible that all documents seen in this segment + // hit non-aborting exceptions, in which case we will + // not have yet init'd the FieldsWriter: initFieldsWriter(); fill(state.numDocs); } @@ -67,23 +84,9 @@ int allocCount; - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } + void abort() { + reset(); - synchronized void abort() { if (fieldsWriter != null) { fieldsWriter.abort(); fieldsWriter = null; @@ -101,53 +104,40 @@ } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + void finishDocument() throws IOException { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start"); + initFieldsWriter(); + fill(docState.docID); - fill(perDoc.docID); + if (fieldsWriter != null && numStoredFields > 0) { + fieldsWriter.startDocument(numStoredFields); + for (int i = 0; i < numStoredFields; i++) { + fieldsWriter.writeField(fieldNumbers[i], storedFields[i]); + } + lastDocID++; + } - // Append stored fields to the real FieldsWriter: - fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt); - lastDocID++; - perDoc.reset(); - free(perDoc); + reset(); assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end"); } - synchronized void free(PerDoc perDoc) { - assert freeCount < docFreeList.length; - assert 0 == perDoc.numStoredFields; - assert 0 == perDoc.fdt.length(); - assert 0 == perDoc.fdt.getFilePointer(); - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer(); - RAMOutputStream fdt = new RAMOutputStream(buffer); - int numStoredFields; - - void reset() { - fdt.reset(); - buffer.recycle(); - numStoredFields = 0; + public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { + if (numStoredFields == storedFields.length) { + int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + Fieldable[] newArray = new Fieldable[newSize]; + System.arraycopy(storedFields, 0, newArray, 0, numStoredFields); + storedFields = newArray; } - @Override - void abort() { - reset(); - free(this); + if (numStoredFields == fieldNumbers.length) { + fieldNumbers = ArrayUtil.grow(fieldNumbers); } - @Override - public long sizeInBytes() { - return buffer.getSizeInBytes(); - } + storedFields[numStoredFields] = field; + fieldNumbers[numStoredFields] = fieldInfo.number; + numStoredFields++; - @Override - public void finish() throws IOException { - finishDocument(this); - } + assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); } } Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class DocFieldConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract DocFieldConsumerPerField addField(FieldInfo fi); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/IntBlockPool.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IntBlockPool.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/IntBlockPool.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import java.util.Arrays; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,24 +24,24 @@ public int[][] buffers = new int[10][]; int bufferUpto = -1; // Which buffer we are upto - public int intUpto = DocumentsWriter.INT_BLOCK_SIZE; // Where we are in head buffer + public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE; // Where we are in head buffer public int[] buffer; // Current head buffer - public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE; // Current head offset + public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE; // Current head offset - final private DocumentsWriter docWriter; + final private DocumentsWriterPerThread docWriter; - public IntBlockPool(DocumentsWriter docWriter) { + public IntBlockPool(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; } public void reset() { if (bufferUpto != -1) { - if (bufferUpto > 0) - // Recycle all but the first buffer - docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto); - // Reuse first buffer + if (bufferUpto > 0) { + docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1); + Arrays.fill(buffers, 1, bufferUpto, null); + } bufferUpto = 0; intUpto = 0; intOffset = 0; @@ -57,7 +59,7 @@ bufferUpto++; intUpto = 0; - intOffset += DocumentsWriter.INT_BLOCK_SIZE; + intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE; } } Index: lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java (revision 0) @@ -0,0 +1,88 @@ +package org.apache.lucene.index; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.document.Document; + +/** + * A {@link DocumentsWriterPerThreadPool} implementation that tries to assign an + * indexing thread to the same {@link ThreadState} each time the thread tries to + * obtain a {@link ThreadState}. Once a new {@link ThreadState} is created it is + * associated with the creating thread. Subsequently, if the threads associated + * {@link ThreadState} is not in use it will be associated with the requesting + * thread. Otherwise, if the {@link ThreadState} is used by another thread + * {@link ThreadAffinityDocumentsWriterThreadPool} tries to find the currently + * minimal contended {@link ThreadState}. + */ +public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerThreadPool { + private Map threadBindings = new ConcurrentHashMap(); + + public ThreadAffinityDocumentsWriterThreadPool(int maxNumPerThreads) { + super(maxNumPerThreads); + assert getMaxThreadStates() >= 1; + } + + @Override + public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) { + ThreadState threadState = threadBindings.get(requestingThread); + if (threadState != null) { + if (threadState.tryLock()) { + return threadState; + } + } + ThreadState minThreadState = null; + + + /* TODO -- another thread could lock the minThreadState we just got while + we should somehow prevent this. */ + // Find the state that has minimum number of threads waiting + minThreadState = minContendedThreadState(); + if (minThreadState == null || minThreadState.hasQueuedThreads()) { + final ThreadState newState = newThreadState(); // state is already locked if non-null + if (newState != null) { + assert newState.isHeldByCurrentThread(); + threadBindings.put(requestingThread, newState); + return newState; + } else if (minThreadState == null) { + /* + * no new threadState available we just take the minContented one + * This must return a valid thread state since we accessed the + * synced context in newThreadState() above. + */ + minThreadState = minContendedThreadState(); + } + } + assert minThreadState != null: "ThreadState is null"; + + minThreadState.lock(); + return minThreadState; + } + + /* + @Override + public void clearThreadBindings(ThreadState perThread) { + threadBindings.clear(); + } + + @Override + public void clearAllThreadBindings() { + threadBindings.clear(); + } + */ +} Property changes on: lucene\src\java\org\apache\lucene\index\ThreadAffinityDocumentsWriterThreadPool.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (revision 0) @@ -0,0 +1,394 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Queue; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.util.ThreadInterruptedException; + +/** + * This class controls {@link DocumentsWriterPerThread} flushing during + * indexing. It tracks the memory consumption per + * {@link DocumentsWriterPerThread} and uses a configured {@link FlushPolicy} to + * decide if a {@link DocumentsWriterPerThread} must flush. + *

+ * In addition to the {@link FlushPolicy} the flush control might set certain + * {@link DocumentsWriterPerThread} as flush pending iff a + * {@link DocumentsWriterPerThread} exceeds the + * {@link IndexWriterConfig#getRAMPerThreadHardLimitMB()} to prevent address + * space exhaustion. + */ +public final class DocumentsWriterFlushControl { + + private final long hardMaxBytesPerDWPT; + private long activeBytes = 0; + private long flushBytes = 0; + private volatile int numPending = 0; + private volatile int numFlushing = 0; + final AtomicBoolean flushDeletes = new AtomicBoolean(false); + private boolean fullFlush = false; + private Queue flushQueue = new LinkedList(); + // only for safety reasons if a DWPT is close to the RAM limit + private Queue blockedFlushes = new LinkedList(); + + + long peakActiveBytes = 0;// only with assert + long peakFlushBytes = 0;// only with assert + long peakNetBytes = 0;// only with assert + private final Healthiness healthiness; + private final DocumentsWriterPerThreadPool perThreadPool; + private final FlushPolicy flushPolicy; + private boolean closed = false; + private final HashMap flushingWriters = new HashMap(); + private final DocumentsWriter documentsWriter; + + DocumentsWriterFlushControl(DocumentsWriter documentsWriter, + Healthiness healthiness, long hardMaxBytesPerDWPT) { + this.healthiness = healthiness; + this.perThreadPool = documentsWriter.perThreadPool; + this.flushPolicy = documentsWriter.flushPolicy; + this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT; + this.documentsWriter = documentsWriter; + } + + public synchronized long activeBytes() { + return activeBytes; + } + + public synchronized long flushBytes() { + return flushBytes; + } + + public synchronized long netBytes() { + return flushBytes + activeBytes; + } + + private void commitPerThreadBytes(ThreadState perThread) { + final long delta = perThread.perThread.bytesUsed() + - perThread.bytesUsed; + perThread.bytesUsed += delta; + /* + * We need to differentiate here if we are pending since setFlushPending + * moves the perThread memory to the flushBytes and we could be set to + * pending during a delete + */ + if (perThread.flushPending) { + flushBytes += delta; + } else { + activeBytes += delta; + } + assert updatePeaks(delta); + } + + // only for asserts + private boolean updatePeaks(long delta) { + peakActiveBytes = Math.max(peakActiveBytes, activeBytes); + peakFlushBytes = Math.max(peakFlushBytes, flushBytes); + peakNetBytes = Math.max(peakNetBytes, netBytes()); + return true; + } + + synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread, + boolean isUpdate) { + commitPerThreadBytes(perThread); + if (!perThread.flushPending) { + if (isUpdate) { + flushPolicy.onUpdate(this, perThread); + } else { + flushPolicy.onInsert(this, perThread); + } + if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) { + // Safety check to prevent a single DWPT exceeding its RAM limit. This + // is super important since we can not address more than 2048 MB per DWPT + setFlushPending(perThread); + if (fullFlush) { + DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false); + assert toBlock != null; + blockedFlushes.add(toBlock); + } + } + } + final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false); + healthiness.updateStalled(this); + return flushingDWPT; + } + + synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) { + assert flushingWriters.containsKey(dwpt); + try { + numFlushing--; + Long bytes = flushingWriters.remove(dwpt); + flushBytes -= bytes.longValue(); + perThreadPool.recycle(dwpt); + healthiness.updateStalled(this); + } finally { + notifyAll(); + } + } + + public synchronized boolean anyFlushing() { + return numFlushing != 0; + } + + public synchronized void waitForFlush() { + if (numFlushing != 0) { + try { + this.wait(); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + } + } + + /** + * Sets flush pending state on the given {@link ThreadState}. The + * {@link ThreadState} must have indexed at least on Document and must not be + * already pending. + */ + public synchronized void setFlushPending(ThreadState perThread) { + assert !perThread.flushPending; + if (perThread.perThread.getNumDocsInRAM() > 0) { + perThread.flushPending = true; // write access synced + final long bytes = perThread.bytesUsed; + flushBytes += bytes; + activeBytes -= bytes; + numPending++; // write access synced + } // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing + + } + + synchronized void doOnAbort(ThreadState state) { + if (state.flushPending) { + flushBytes -= state.bytesUsed; + } else { + activeBytes -= state.bytesUsed; + } + // Take it out of the loop this DWPT is stale + perThreadPool.replaceForFlush(state, closed); + healthiness.updateStalled(this); + } + + synchronized DocumentsWriterPerThread tryCheckoutForFlush( + ThreadState perThread, boolean setPending) { + if (fullFlush) { + return null; + } + return internalTryCheckOutForFlush(perThread, setPending); + } + + private DocumentsWriterPerThread internalTryCheckOutForFlush( + ThreadState perThread, boolean setPending) { + if (setPending && !perThread.flushPending) { + setFlushPending(perThread); + } + if (perThread.flushPending) { + // We are pending so all memory is already moved to flushBytes + if (perThread.tryLock()) { + try { + if (perThread.isActive()) { + assert perThread.isHeldByCurrentThread(); + final DocumentsWriterPerThread dwpt; + final long bytes = perThread.bytesUsed; // do that before + // replace! + dwpt = perThreadPool.replaceForFlush(perThread, closed); + assert !flushingWriters.containsKey(dwpt) : "DWPT is already flushing"; + // Record the flushing DWPT to reduce flushBytes in doAfterFlush + flushingWriters.put(dwpt, Long.valueOf(bytes)); + numPending--; // write access synced + numFlushing++; + return dwpt; + } + } finally { + perThread.unlock(); + } + } + } + return null; + } + + @Override + public String toString() { + return "DocumentsWriterFlushControl [activeBytes=" + activeBytes + + ", flushBytes=" + flushBytes + "]"; + } + + DocumentsWriterPerThread nextPendingFlush() { + synchronized (this) { + DocumentsWriterPerThread poll = flushQueue.poll(); + if (poll != null) { + return poll; + } + } + if (numPending > 0) { + final Iterator allActiveThreads = perThreadPool + .getActivePerThreadsIterator(); + while (allActiveThreads.hasNext() && numPending > 0) { + ThreadState next = allActiveThreads.next(); + if (next.flushPending) { + final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false); + if (dwpt != null) { + return dwpt; + } + } + } + } + return null; + } + + synchronized void setClosed() { + // set by DW to signal that we should not release new DWPT after close + this.closed = true; + } + + /** + * Returns an iterator that provides access to all currently active {@link ThreadState}s + */ + public Iterator allActiveThreads() { + return perThreadPool.getActivePerThreadsIterator(); + } + + synchronized void doOnDelete() { + // pass null this is a global delete no update + flushPolicy.onDelete(this, null); + } + + /** + * Returns the number of delete terms in the global pool + */ + public int getNumGlobalTermDeletes() { + return documentsWriter.deleteQueue.numGlobalTermDeletes(); + } + + int numFlushingDWPT() { + return numFlushing; + } + + public boolean doApplyAllDeletes() { + return flushDeletes.getAndSet(false); + } + + public void setApplyAllDeletes() { + flushDeletes.set(true); + } + + int numActiveDWPT() { + return this.perThreadPool.getMaxThreadStates(); + } + + void markForFullFlush() { + final DocumentsWriterDeleteQueue flushingQueue; + synchronized (this) { + assert !fullFlush; + fullFlush = true; + flushingQueue = documentsWriter.deleteQueue; + // Set a new delete queue - all subsequent DWPT will use this queue until + // we do another full flush + DocumentsWriterDeleteQueue newQueue = new DocumentsWriterDeleteQueue(flushingQueue.generation+1); + documentsWriter.deleteQueue = newQueue; + } + final Iterator allActiveThreads = perThreadPool + .getActivePerThreadsIterator(); + final ArrayList toFlush = new ArrayList(); + while (allActiveThreads.hasNext()) { + final ThreadState next = allActiveThreads.next(); + next.lock(); + try { + if (!next.isActive()) { + continue; + } + assert next.perThread.deleteQueue == flushingQueue + || next.perThread.deleteQueue == documentsWriter.deleteQueue : " flushingQueue: " + + flushingQueue + + " currentqueue: " + + documentsWriter.deleteQueue + + " perThread queue: " + + next.perThread.deleteQueue + + " numDocsInRam: " + next.perThread.getNumDocsInRAM(); + if (next.perThread.deleteQueue != flushingQueue) { + // this one is already a new DWPT + continue; + } + if (next.perThread.getNumDocsInRAM() > 0 ) { + final DocumentsWriterPerThread dwpt = next.perThread; // just for assert + final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true); + assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents"; + assert dwpt == flushingDWPT : "flushControl returned different DWPT"; + toFlush.add(flushingDWPT); + } else { + // get the new delete queue from DW + next.perThread.initialize(); + } + } finally { + next.unlock(); + } + } + synchronized (this) { + assert assertBlockedFlushes(flushingQueue); + flushQueue.addAll(blockedFlushes); + blockedFlushes.clear(); + flushQueue.addAll(toFlush); + } + } + + synchronized void finishFullFlush() { + assert fullFlush; + assert flushQueue.isEmpty(); + try { + if (!blockedFlushes.isEmpty()) { + assert assertBlockedFlushes(documentsWriter.deleteQueue); + flushQueue.addAll(blockedFlushes); + blockedFlushes.clear(); + } + } finally { + fullFlush = false; + } + } + + boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) { + Queue flushes = this.blockedFlushes; + for (DocumentsWriterPerThread documentsWriterPerThread : flushes) { + assert documentsWriterPerThread.deleteQueue == flushingQueue; + } + return true; + } + + synchronized void abortFullFlushes() { + try { + for (DocumentsWriterPerThread dwpt : flushQueue) { + doAfterFlush(dwpt); + } + for (DocumentsWriterPerThread dwpt : blockedFlushes) { + doAfterFlush(dwpt); + } + + } finally { + fullFlush = false; + flushQueue.clear(); + blockedFlushes.clear(); + } + } + + synchronized boolean isFullFlush() { + return fullFlush; + } +} \ No newline at end of file Property changes on: lucene\src\java\org\apache\lucene\index\DocumentsWriterFlushControl.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/FlushPolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FlushPolicy.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/FlushPolicy.java (revision 0) @@ -0,0 +1,131 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Iterator; + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.SetOnce; + +/** + * {@link FlushPolicy} controls when segments are flushed from a RAM resident + * internal data-structure to the {@link IndexWriter}s {@link Directory}. + *

+ * Segments are traditionally flushed by: + *

    + *
  • RAM consumption - configured via + * {@link IndexWriterConfig#setRAMBufferSizeMB(double)}
  • + *
  • Number of RAM resident documents - configured via + * {@link IndexWriterConfig#setMaxBufferedDocs(int)}
  • + *
  • Number of buffered delete terms/queries - configured via + * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}
  • + *
+ * + * The {@link IndexWriter} consults a provided {@link FlushPolicy} to control the + * flushing process. The policy is informed for each added or + * updated document as well as for each delete term. Based on the + * {@link FlushPolicy}, the information provided via {@link ThreadState} and + * {@link DocumentsWriterFlushControl}, the {@link FlushPolicy} decides if a + * {@link DocumentsWriterPerThread} needs flushing and mark it as + * flush-pending via + * {@link DocumentsWriterFlushControl#setFlushPending(ThreadState)}. + * + * @see ThreadState + * @see DocumentsWriterFlushControl + * @see DocumentsWriterPerThread + * @see IndexWriterConfig#setFlushPolicy(FlushPolicy) + */ +public abstract class FlushPolicy { + protected final SetOnce writer = new SetOnce(); + protected IndexWriterConfig indexWriterConfig; + + /** + * Called for each delete term. If this is a delete triggered due to an update + * the given {@link ThreadState} is non-null. + *

+ * Note: This method is called synchronized on the given + * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling + * thread holds the lock on the given {@link ThreadState} + */ + public abstract void onDelete(DocumentsWriterFlushControl control, + ThreadState state); + + /** + * Called for each document update on the given {@link ThreadState}'s + * {@link DocumentsWriterPerThread}. + *

+ * Note: This method is called synchronized on the given + * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling + * thread holds the lock on the given {@link ThreadState} + */ + public void onUpdate(DocumentsWriterFlushControl control, ThreadState state) { + onInsert(control, state); + if (!state.flushPending) { + onDelete(control, state); + } + } + + /** + * Called for each document addition on the given {@link ThreadState}s + * {@link DocumentsWriterPerThread}. + *

+ * Note: This method is synchronized by the given + * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling + * thread holds the lock on the given {@link ThreadState} + */ + public abstract void onInsert(DocumentsWriterFlushControl control, + ThreadState state); + + /** + * Called by {@link DocumentsWriter} to initialize the FlushPolicy + */ + protected synchronized void init(DocumentsWriter docsWriter) { + writer.set(docsWriter); + indexWriterConfig = docsWriter.indexWriter.getConfig(); + } + + /** + * Returns the current most RAM consuming non-pending {@link ThreadState} with + * at least one indexed document. + *

+ * This method will never return null + */ + protected ThreadState findLargestNonPendingWriter( + DocumentsWriterFlushControl control, ThreadState perThreadState) { + assert perThreadState.perThread.getNumDocsInRAM() > 0; + long maxRamSoFar = perThreadState.bytesUsed; + // the dwpt which needs to be flushed eventually + ThreadState maxRamUsingThreadState = perThreadState; + assert !perThreadState.flushPending : "DWPT should have flushed"; + Iterator activePerThreadsIterator = control.allActiveThreads(); + while (activePerThreadsIterator.hasNext()) { + ThreadState next = activePerThreadsIterator.next(); + if (!next.flushPending) { + final long nextRam = next.bytesUsed; + if (nextRam > maxRamSoFar && next.perThread.getNumDocsInRAM() > 0) { + maxRamSoFar = nextRam; + maxRamUsingThreadState = next; + } + } + } + assert writer.get().message( + "set largest ram consuming thread pending on lower watermark"); + return maxRamUsingThreadState; + } + +} Property changes on: lucene\src\java\org\apache\lucene\index\FlushPolicy.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/Healthiness.java =================================================================== --- lucene/src/java/org/apache/lucene/index/Healthiness.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/Healthiness.java (revision 0) @@ -0,0 +1,121 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.concurrent.locks.AbstractQueuedSynchronizer; + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; + +/** + * Controls the health status of a {@link DocumentsWriter} sessions. This class + * used to block incoming indexing threads if flushing significantly slower than + * indexing to ensure the {@link DocumentsWriter}s healthiness. If flushing is + * significantly slower than indexing the net memory used within an + * {@link IndexWriter} session can increase very quickly and easily exceed the + * JVM's available memory. + *

+ * To prevent OOM Errors and ensure IndexWriter's stability this class blocks + * incoming threads from indexing once 2 x number of available + * {@link ThreadState}s in {@link DocumentsWriterPerThreadPool} is exceeded. + * Once flushing catches up and the number of flushing DWPT is equal or lower + * than the number of active {@link ThreadState}s threads are released and can + * continue indexing. + */ +//TODO: rename this to DocumentsWriterStallControl (or something like that)? +final class Healthiness { + + @SuppressWarnings("serial") + private static final class Sync extends AbstractQueuedSynchronizer { + volatile boolean hasBlockedThreads = false; // only with assert + + Sync() { + setState(0); + } + + boolean isHealthy() { + return getState() == 0; + } + + boolean trySetStalled() { + int state = getState(); + return compareAndSetState(state, state + 1); + } + + boolean tryReset() { + final int oldState = getState(); + if (oldState == 0) + return true; + if (compareAndSetState(oldState, 0)) { + releaseShared(0); + return true; + } + return false; + } + + @Override + public int tryAcquireShared(int acquires) { + assert maybeSetHasBlocked(getState()); + return getState() == 0 ? 1 : -1; + } + + // only used for testing + private boolean maybeSetHasBlocked(int state) { + hasBlockedThreads |= getState() != 0; + return true; + } + + @Override + public boolean tryReleaseShared(int newState) { + return (getState() == 0); + } + } + + private final Sync sync = new Sync(); + volatile boolean wasStalled = false; // only with asserts + + boolean anyStalledThreads() { + return !sync.isHealthy(); + } + + /** + * Update the stalled flag status. This method will set the stalled flag to + * true iff the number of flushing + * {@link DocumentsWriterPerThread} is greater than the number of active + * {@link DocumentsWriterPerThread}. Otherwise it will reset the + * {@link Healthiness} to healthy and release all threads waiting on + * {@link #waitIfStalled()} + */ + void updateStalled(DocumentsWriterFlushControl flushControl) { + do { + // if we have more flushing DWPT than numActiveDWPT we stall! + while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) { + if (sync.trySetStalled()) { + assert wasStalled = true; + return; + } + } + } while (!sync.tryReset()); + } + + void waitIfStalled() { + sync.acquireShared(0); + } + + boolean hasBlocked() { + return sync.hasBlockedThreads; + } +} \ No newline at end of file Property changes on: lucene\src\java\org\apache\lucene\index\Healthiness.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (working copy) @@ -52,9 +52,15 @@ final int[] queryLimits; final int bytesUsed; final int numTermDeletes; - final long gen; + private long gen = -1; // assigned by BufferedDeletesStream once pushed + final boolean isSegmentPrivate; // set to true iff this frozen packet represents + // a segment private deletes. in that case is should + // only have Queries - public FrozenBufferedDeletes(BufferedDeletes deletes, long gen) { + + public FrozenBufferedDeletes(BufferedDeletes deletes, boolean isSegmentPrivate) { + this.isSegmentPrivate = isSegmentPrivate; + assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries"; terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]); queries = new Query[deletes.queries.size()]; queryLimits = new int[deletes.queries.size()]; @@ -66,8 +72,17 @@ } bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY; numTermDeletes = deletes.numTermDeletes.get(); + } + + public void setDelGen(long gen) { + assert this.gen == -1; this.gen = gen; } + + public long delGen() { + assert gen != -1; + return gen; + } public Iterable termsIterable() { return new Iterable() { Index: lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (working copy) @@ -1,34 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class DocConsumerPerThread { - - /** Process the document. If there is - * something for this document to be done in docID order, - * you should encapsulate that as a - * DocumentsWriter.DocWriter and return it. - * DocumentsWriter then calls finish() on this object - * when it's its turn. */ - abstract DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException; - - abstract void doAfterFlush(); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (revision 0) @@ -0,0 +1,259 @@ +package org.apache.lucene.index; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Iterator; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; +import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.util.SetOnce; + +/** + * {@link DocumentsWriterPerThreadPool} controls {@link ThreadState} instances + * and their thread assignments during indexing. Each {@link ThreadState} holds + * a reference to a {@link DocumentsWriterPerThread} that is once a + * {@link ThreadState} is obtained from the pool exclusively used for indexing a + * single document by the obtaining thread. Each indexing thread must obtain + * such a {@link ThreadState} to make progress. Depending on the + * {@link DocumentsWriterPerThreadPool} implementation {@link ThreadState} + * assignments might differ from document to document. + *

+ * Once a {@link DocumentsWriterPerThread} is selected for flush the thread pool + * is reusing the flushing {@link DocumentsWriterPerThread}s ThreadState with a + * new {@link DocumentsWriterPerThread} instance. + *

+ */ +public abstract class DocumentsWriterPerThreadPool { + + /** + * {@link ThreadState} references and guards a + * {@link DocumentsWriterPerThread} instance that is used during indexing to + * build a in-memory index segment. {@link ThreadState} also holds all flush + * related per-thread data controlled by {@link DocumentsWriterFlushControl}. + *

+ * A {@link ThreadState}, its methods and members should only accessed by one + * thread a time. Users must acquire the lock via {@link ThreadState#lock()} + * and release the lock in a finally block via {@link ThreadState#unlock()} + * before accessing the state. + */ + @SuppressWarnings("serial") + public final static class ThreadState extends ReentrantLock { + // package private for FlushPolicy + DocumentsWriterPerThread perThread; + // write access guarded by DocumentsWriterFlushControl + volatile boolean flushPending = false; + // write access guarded by DocumentsWriterFlushControl + long bytesUsed = 0; + // guarded by Reentrant lock + private boolean isActive = true; + + ThreadState(DocumentsWriterPerThread perThread) { + this.perThread = perThread; + } + + /** + * Resets the internal {@link DocumentsWriterPerThread} with the given one. + * if the given DWPT is null this ThreadState is marked as inactive and should not be used + * for indexing anymore. + * @see #isActive() + */ + void resetWriter(DocumentsWriterPerThread perThread) { + assert this.isHeldByCurrentThread(); + if (perThread == null) { + isActive = false; + } + this.perThread = perThread; + this.bytesUsed = 0; + this.flushPending = false; + } + + /** + * Returns true if this ThreadState is still open. This will + * only return false iff the DW has been closed and this + * ThreadState is already checked out for flush. + */ + boolean isActive() { + assert this.isHeldByCurrentThread(); + return isActive; + } + + /** + * Returns the number of currently active bytes in this ThreadState's + * {@link DocumentsWriterPerThread} + */ + public long getBytesUsedPerThread() { + assert this.isHeldByCurrentThread(); + // public for FlushPolicy + return bytesUsed; + } + + /** + * Returns this {@link ThreadState}s {@link DocumentsWriterPerThread} + */ + public DocumentsWriterPerThread getDocumentsWriterPerThread() { + assert this.isHeldByCurrentThread(); + // public for FlushPolicy + return perThread; + } + + /** + * Returns true iff this {@link ThreadState} is marked as flush + * pending otherwise false + */ + public boolean isFlushPending() { + return flushPending; + } + } + + private final ThreadState[] perThreads; + private volatile int numThreadStatesActive; + private CodecProvider codecProvider; + private FieldNumberBiMap globalFieldMap; + private final SetOnce documentsWriter = new SetOnce(); + + public DocumentsWriterPerThreadPool(int maxNumPerThreads) { + maxNumPerThreads = (maxNumPerThreads < 1) ? IndexWriterConfig.DEFAULT_MAX_THREAD_STATES : maxNumPerThreads; + perThreads = new ThreadState[maxNumPerThreads]; + numThreadStatesActive = 0; + } + + public void initialize(DocumentsWriter documentsWriter, FieldNumberBiMap globalFieldMap, IndexWriterConfig config) { + this.documentsWriter.set(documentsWriter); // thread pool is bound to DW + final CodecProvider codecs = config.getCodecProvider(); + this.codecProvider = codecs; + this.globalFieldMap = globalFieldMap; + for (int i = 0; i < perThreads.length; i++) { + final FieldInfos infos = globalFieldMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)); + perThreads[i] = new ThreadState(new DocumentsWriterPerThread(documentsWriter.directory, documentsWriter, infos, documentsWriter.chain)); + } + } + + /** + * Returns the max number of {@link ThreadState} instances available in this + * {@link DocumentsWriterPerThreadPool} + */ + public int getMaxThreadStates() { + return perThreads.length; + } + + /** + * Returns a new {@link ThreadState} iff any new state is available otherwise + * null. + *

+ * NOTE: the returned {@link ThreadState} is already locked iff non- + * null. + * + * @return a new {@link ThreadState} iff any new state is available otherwise + * null + */ + public synchronized ThreadState newThreadState() { + if (numThreadStatesActive < perThreads.length) { + final ThreadState threadState = perThreads[numThreadStatesActive]; + threadState.lock(); // lock so nobody else will get this ThreadState + numThreadStatesActive++; // increment will publish the ThreadState + threadState.perThread.initialize(); + return threadState; + } + return null; + } + + protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) { + assert threadState.isHeldByCurrentThread(); + final DocumentsWriterPerThread dwpt = threadState.perThread; + if (!closed) { + final FieldInfos infos = globalFieldMap.newFieldInfos(SegmentCodecsBuilder.create(codecProvider)); + final DocumentsWriterPerThread newDwpt = new DocumentsWriterPerThread(dwpt, infos); + newDwpt.initialize(); + threadState.resetWriter(newDwpt); + } else { + threadState.resetWriter(null); + } + return dwpt; + } + + public void recycle(DocumentsWriterPerThread dwpt) { + // don't recycle DWPT by default + } + + public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc); + + //public abstract void clearThreadBindings(ThreadState perThread); + + //public abstract void clearAllThreadBindings(); + + /** + * Returns an iterator providing access to all {@link ThreadState} + * instances. + */ + // TODO: new Iterator per indexed doc is overkill...? + public Iterator getAllPerThreadsIterator() { + return getPerThreadsIterator(this.perThreads.length); + } + + /** + * Returns an iterator providing access to all active {@link ThreadState} + * instances. + *

+ * Note: The returned iterator will only iterator + * {@link ThreadState}s that are active at the point in time when this method + * has been called. + * + */ + // TODO: new Iterator per indexed doc is overkill...? + public Iterator getActivePerThreadsIterator() { + return getPerThreadsIterator(numThreadStatesActive); + } + + private Iterator getPerThreadsIterator(final int upto) { + return new Iterator() { + int i = 0; + + public boolean hasNext() { + return i < upto; + } + + public ThreadState next() { + return perThreads[i++]; + } + + public void remove() { + throw new UnsupportedOperationException("remove() not supported."); + } + }; + } + + /** + * Returns the ThreadState with the minimum estimated number of threads + * waiting to acquire its lock or null if no {@link ThreadState} + * is yet visible to the calling thread. + */ + protected ThreadState minContendedThreadState() { + ThreadState minThreadState = null; + // TODO: new Iterator per indexed doc is overkill...? + final Iterator it = getActivePerThreadsIterator(); + while (it.hasNext()) { + final ThreadState state = it.next(); + if (minThreadState == null || state.getQueueLength() < minThreadState.getQueueLength()) { + minThreadState = state; + } + } + return minThreadState; + } +} Property changes on: lucene\src\java\org\apache\lucene\index\DocumentsWriterPerThreadPool.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/BufferedDeletes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (working copy) @@ -72,13 +72,18 @@ public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); - final AtomicLong bytesUsed = new AtomicLong(); + final AtomicLong bytesUsed; private final static boolean VERBOSE_DELETES = false; long gen; + public BufferedDeletes(boolean sortTerms) { + this(sortTerms, new AtomicLong()); + } - public BufferedDeletes(boolean sortTerms) { + BufferedDeletes(boolean sortTerms, AtomicLong bytesUsed) { + assert bytesUsed != null; + this.bytesUsed = bytesUsed; if (sortTerms) { terms = new TreeMap(); } else { Index: lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (working copy) @@ -1,115 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Comparator; - -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.BytesRef; - -import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; - -// TODO FI: some of this is "generic" to TermsHash* so we -// should factor it out so other consumers don't have to -// duplicate this code - -/** Used by DocumentsWriter to merge the postings from - * multiple ThreadStates when creating a segment */ -final class FreqProxFieldMergeState { - - final FreqProxTermsWriterPerField field; - final int numPostings; - private final ByteBlockPool bytePool; - final int[] termIDs; - final FreqProxPostingsArray postings; - int currentTermID; - - final BytesRef text = new BytesRef(); - - private int postingUpto = -1; - - final ByteSliceReader freq = new ByteSliceReader(); - final ByteSliceReader prox = new ByteSliceReader(); - - int docID; - int termFreq; - - public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, Comparator termComp) { - this.field = field; - this.numPostings = field.termsHashPerField.bytesHash.size(); - this.bytePool = field.perThread.termsHashPerThread.bytePool; - this.termIDs = field.termsHashPerField.sortPostings(termComp); - this.postings = (FreqProxPostingsArray) field.termsHashPerField.postingsArray; - } - - boolean nextTerm() throws IOException { - postingUpto++; - if (postingUpto == numPostings) { - return false; - } - - currentTermID = termIDs[postingUpto]; - docID = 0; - - // Get BytesRef - final int textStart = postings.textStarts[currentTermID]; - bytePool.setBytesRef(text, textStart); - - field.termsHashPerField.initReader(freq, currentTermID, 0); - if (!field.fieldInfo.omitTermFreqAndPositions) { - field.termsHashPerField.initReader(prox, currentTermID, 1); - } - - // Should always be true - boolean result = nextDoc(); - assert result; - - return true; - } - - public boolean nextDoc() throws IOException { - if (freq.eof()) { - if (postings.lastDocCodes[currentTermID] != -1) { - // Return last doc - docID = postings.lastDocIDs[currentTermID]; - if (!field.omitTermFreqAndPositions) - termFreq = postings.docFreqs[currentTermID]; - postings.lastDocCodes[currentTermID] = -1; - return true; - } else - // EOF - return false; - } - - final int code = freq.readVInt(); - if (field.omitTermFreqAndPositions) - docID += code; - else { - docID += code >>> 1; - if ((code & 1) != 0) - termFreq = 1; - else - termFreq = freq.readVInt(); - } - - assert docID != postings.lastDocIDs[currentTermID]; - - return true; - } -} Index: lucene/src/java/org/apache/lucene/index/TermsHash.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHash.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermsHash.java (working copy) @@ -18,12 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; import java.util.Map; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; + /** This class implements {@link InvertedDocConsumer}, which * is passed each token produced by the analyzer on each * field. It stores these tokens in a hash table, and @@ -36,78 +36,118 @@ final TermsHashConsumer consumer; final TermsHash nextTermsHash; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; - boolean trackAllocations; + final IntBlockPool intPool; + final ByteBlockPool bytePool; + ByteBlockPool termBytePool; - public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) { + final boolean primary; + final DocumentsWriterPerThread.DocState docState; + + // Used when comparing postings via termRefComp, in TermsHashPerField + final BytesRef tr1 = new BytesRef(); + final BytesRef tr2 = new BytesRef(); + + // Used by perField to obtain terms from the analysis chain + final BytesRef termBytesRef = new BytesRef(10); + + final boolean trackAllocations; + + + public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) { + this.docState = docWriter.docState; this.docWriter = docWriter; this.consumer = consumer; + this.trackAllocations = trackAllocations; this.nextTermsHash = nextTermsHash; - this.trackAllocations = trackAllocations; + intPool = new IntBlockPool(docWriter); + bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); + + if (nextTermsHash != null) { + // We are primary + primary = true; + termBytePool = bytePool; + nextTermsHash.termBytePool = bytePool; + } else { + primary = false; + } } @Override - InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null); + public void abort() { + reset(); + try { + consumer.abort(); + } finally { + if (nextTermsHash != null) { + nextTermsHash.abort(); + } + } } - TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread); - } + // Clear all state + void reset() { + intPool.reset(); + bytePool.reset(); - @Override - public void abort() { - consumer.abort(); - if (nextTermsHash != null) - nextTermsHash.abort(); + if (primary) { + bytePool.reset(); + } } @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> nextThreadsAndFields; + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { + Map childFields = new HashMap(); + Map nextChildFields; - if (nextTermsHash != null) - nextThreadsAndFields = new HashMap>(); - else - nextThreadsAndFields = null; + if (nextTermsHash != null) { + nextChildFields = new HashMap(); + } else { + nextChildFields = null; + } - for (final Map.Entry> entry : threadsAndFields.entrySet()) { + for (final Map.Entry entry : fieldsToFlush.entrySet()) { + TermsHashPerField perField = (TermsHashPerField) entry.getValue(); + childFields.put(entry.getKey(), perField.consumer); + if (nextTermsHash != null) { + nextChildFields.put(entry.getKey(), perField.nextPerField); + } + } - TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey(); + consumer.flush(childFields, state); - Collection fields = entry.getValue(); + if (nextTermsHash != null) { + nextTermsHash.flush(nextChildFields, state); + } + } - Iterator fieldsIt = fields.iterator(); - Collection childFields = new HashSet(); - Collection nextChildFields; + @Override + InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { + return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo); + } - if (nextTermsHash != null) - nextChildFields = new HashSet(); - else - nextChildFields = null; + @Override + public boolean freeRAM() { + return false; + } - while(fieldsIt.hasNext()) { - TermsHashPerField perField = (TermsHashPerField) fieldsIt.next(); - childFields.add(perField.consumer); - if (nextTermsHash != null) - nextChildFields.add(perField.nextPerField); + @Override + void finishDocument() throws IOException { + try { + consumer.finishDocument(this); + } finally { + if (nextTermsHash != null) { + nextTermsHash.consumer.finishDocument(nextTermsHash); } - - childThreadsAndFields.put(perThread.consumer, childFields); - if (nextTermsHash != null) - nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields); } - - consumer.flush(childThreadsAndFields, state); - - if (nextTermsHash != null) - nextTermsHash.flush(nextThreadsAndFields, state); } @Override - synchronized public boolean freeRAM() { - return false; + void startDocument() throws IOException { + consumer.startDocument(); + if (nextTermsHash != null) { + nextTermsHash.consumer.startDocument(); + } } } Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -39,24 +39,24 @@ /** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, - * into a single Segment. After adding the appropriate readers, call the merge method to combine the + * into a single Segment. After adding the appropriate readers, call the merge method to combine the * segments. - * + * * @see #merge * @see #add */ final class SegmentMerger { - + /** norms header placeholder */ - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; - + static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + private Directory directory; private String segment; private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; private List readers = new ArrayList(); private final FieldInfos fieldInfos; - + private int mergedDocs; private final MergeState.CheckAbort checkAbort; @@ -64,13 +64,13 @@ /** Maximum number of contiguous documents to bulk-copy when merging stored fields */ private final static int MAX_RAW_MERGE_DOCS = 4192; - + private final CodecProvider codecs; private Codec codec; private SegmentWriteState segmentWriteState; private PayloadProcessorProvider payloadProcessorProvider; - + SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) { this.payloadProcessorProvider = payloadProcessorProvider; directory = dir; @@ -135,10 +135,10 @@ for (String file : files) { cfsWriter.addFile(file); } - + // Perform the merge cfsWriter.close(); - + return files; } @@ -196,13 +196,12 @@ } /** - * + * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ private int mergeFields() throws CorruptIndexException, IOException { - for (IndexReader reader : readers) { if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; @@ -265,7 +264,7 @@ throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption"); segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null); - + return docCount; } @@ -283,7 +282,7 @@ ++j; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { @@ -295,7 +294,7 @@ break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; @@ -349,7 +348,7 @@ * @throws IOException */ private final void mergeVectors() throws IOException { - TermVectorsWriter termVectorsWriter = + TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { @@ -369,7 +368,7 @@ copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); } else { copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); - + } } } finally { @@ -402,7 +401,7 @@ ++docNum; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { @@ -414,7 +413,7 @@ break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.work(300 * numDocs); @@ -425,7 +424,7 @@ // skip deleted docs continue; } - + // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); @@ -434,7 +433,7 @@ } } } - + private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter, final TermVectorsReader matchingVectorsReader, final IndexReader reader) @@ -470,7 +469,7 @@ // Let CodecProvider decide which codec will be used to write // the new segment: - + int docBase = 0; final List fields = new ArrayList(); @@ -498,7 +497,7 @@ mergeState.readerCount = readers.size(); mergeState.fieldInfos = fieldInfos; mergeState.mergedDocCount = mergedDocs; - + // Remap docIDs mergeState.delCounts = new int[mergeState.readerCount]; mergeState.docMaps = new int[mergeState.readerCount][]; @@ -536,7 +535,7 @@ } assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount; } - + if (payloadProcessorProvider != null) { mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory()); } @@ -549,7 +548,7 @@ // apart when we step through the docs enums in // MultiDocsEnum. mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts); - + try { consumer.merge(mergeState, new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), @@ -568,7 +567,7 @@ int[] getDelCounts() { return mergeState.delCounts; } - + public boolean getAnyNonBulkMerges() { assert matchedCount <= readers.size(); return matchedCount != readers.size(); @@ -579,7 +578,7 @@ try { for (FieldInfo fi : fieldInfos) { if (fi.isIndexed && !fi.omitNorms) { - if (output == null) { + if (output == null) { output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } @@ -610,7 +609,7 @@ } } } finally { - if (output != null) { + if (output != null) { output.close(); } } Index: lucene/src/java/org/apache/lucene/index/TermsHashPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (working copy) @@ -34,9 +34,10 @@ final TermsHashConsumerPerField consumer; + final TermsHash termsHash; + final TermsHashPerField nextPerField; - final TermsHashPerThread perThread; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; TermToBytesRefAttribute termAtt; BytesRef termBytesRef; @@ -52,27 +53,27 @@ final FieldInfo fieldInfo; final BytesRefHash bytesHash; - + ParallelPostingsArray postingsArray; private final AtomicLong bytesUsed; - public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { - this.perThread = perThread; - intPool = perThread.intPool; - bytePool = perThread.bytePool; - termBytePool = perThread.termBytePool; - docState = perThread.docState; - bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong(); - + public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) { + intPool = termsHash.intPool; + bytePool = termsHash.bytePool; + termBytePool = termsHash.termBytePool; + docState = termsHash.docState; + this.termsHash = termsHash; + bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed + : new AtomicLong(); fieldState = docInverterPerField.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); + this.consumer = termsHash.consumer.addField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); - bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); + bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.getStreamCount(); numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; - if (nextPerThread != null) - nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); + if (nextTermsHash != null) + nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo); else nextPerField = null; } @@ -80,7 +81,7 @@ void shrinkHash(int targetSize) { // Fully free the bytesHash on each flush but keep the pool untouched // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too - bytesHash.clear(false); + bytesHash.clear(false); } public void reset() { @@ -90,7 +91,7 @@ } @Override - synchronized public void abort() { + public void abort() { reset(); if (nextPerField != null) nextPerField.abort(); @@ -99,14 +100,13 @@ public void initReader(ByteSliceReader reader, int termID, int stream) { assert stream < streamCount; int intStart = postingsArray.intStarts[termID]; - final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK; + final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; reader.init(bytePool, postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } - /** Collapse the hash table & sort in-place. */ public int[] sortPostings(Comparator termComp) { return bytesHash.sort(termComp); @@ -124,7 +124,7 @@ nextPerField.start(f); } } - + @Override boolean start(Fieldable[] fields, int count) throws IOException { doCall = consumer.start(fields, count); @@ -143,11 +143,10 @@ // First time we are seeing this token since we last // flushed the hash. // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) intPool.nextBuffer(); - if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) - bytePool.nextBuffer(); + if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.nextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; @@ -166,8 +165,8 @@ } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } } @@ -192,7 +191,7 @@ if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { - termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8); + termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8); docState.maxTermPrefix = termBytesRef.toString(); } finally { termBytesRef.length = saved; @@ -204,7 +203,7 @@ if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) { intPool.nextBuffer(); } @@ -229,8 +228,8 @@ } else { termID = (-termID)-1; final int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } @@ -278,7 +277,7 @@ if (nextPerField != null) nextPerField.finish(); } - + private static final class PostingsBytesStartArray extends BytesStartArray { private final TermsHashPerField perField; @@ -289,10 +288,10 @@ this.perField = perField; this.bytesUsed = bytesUsed; } - + @Override public int[] init() { - if(perField.postingsArray == null) { + if(perField.postingsArray == null) { perField.postingsArray = perField.consumer.createPostingsArray(2); bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); } @@ -312,7 +311,7 @@ @Override public int[] clear() { if(perField.postingsArray != null) { - bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); + bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting())); perField.postingsArray = null; } return null; Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -19,10 +19,15 @@ import java.io.IOException; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; -import java.util.HashMap; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; + /** * This is a DocConsumer that gathers all fields under the * same name, and calls per-field consumers to process field @@ -33,26 +38,39 @@ final class DocFieldProcessor extends DocConsumer { - final DocumentsWriter docWriter; final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; - public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) { - this.docWriter = docWriter; + // Holds all fields seen in current doc + DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; + int fieldCount; + + // Hash table for all fields ever seen + DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; + int hashMask = 1; + int totalFieldCount; + + float docBoost; + int fieldGen; + final DocumentsWriterPerThread.DocState docState; + + public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) { + this.docState = docWriter.docState; this.consumer = consumer; fieldsWriter = new StoredFieldsWriter(docWriter); } @Override - public void flush(Collection threads, SegmentWriteState state) throws IOException { + public void flush(SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - for ( DocConsumerPerThread thread : threads) { - DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread; - childThreadsAndFields.put(perThread.consumer, perThread.fields()); + Map childFields = new HashMap(); + Collection fields = fields(); + for (DocFieldConsumerPerField f : fields) { + childFields.put(f.getFieldInfo(), f); } + fieldsWriter.flush(state); - consumer.flush(childThreadsAndFields, state); + consumer.flush(childFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, @@ -64,8 +82,20 @@ @Override public void abort() { - fieldsWriter.abort(); - consumer.abort(); + for(int i=0;i fields() { + Collection fields = new HashSet(); + for(int i=0;i fieldHash.length; + + final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; + + // Rehash + int newHashMask = newHashSize-1; + for(int j=0;j docFields = doc.getFields(); + final int numDocFields = docFields.size(); + + // Absorb any new fields first seen in this document. + // Also absorb any changes to fields we had already + // seen before (eg suddenly turning on norms or + // vectors, etc.): + + for(int i=0;i= fieldHash.length/2) + rehash(); + } else { + fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), + field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), + field.getOmitNorms(), false, field.getOmitTermFreqAndPositions()); + } + + if (thisFieldGen != fp.lastGen) { + + // First time we're seeing this field for this doc + fp.fieldCount = 0; + + if (fieldCount == fields.length) { + final int newSize = fields.length*2; + DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; + System.arraycopy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = fp; + fp.lastGen = thisFieldGen; + } + + fp.addField(field); + + if (field.isStored()) { + fieldsWriter.addField(field, fp.fieldInfo); + } + } + + // If we are writing vectors then we must visit + // fields in sorted order so they are written in + // sorted order. TODO: we actually only need to + // sort the subset of fields that have vectors + // enabled; we could save [small amount of] CPU + // here. + quickSort(fields, 0, fieldCount-1); + + for(int i=0;i= hi) + return; + else if (hi == 1+lo) { + if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + final DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[hi]; + array[hi] = tmp; + } + return; + } + + int mid = (lo + hi) >>> 1; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return; + + DocFieldProcessorPerField partition = array[mid]; + + for (; ;) { + while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) + --right; + + while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) + ++left; + + if (left < right) { + DocFieldProcessorPerField tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } else { + break; + } + } + + quickSort(array, lo, left); + quickSort(array, left + 1, hi); + } } Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -19,55 +19,35 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.List; import java.util.Map; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.PostingsConsumer; -import org.apache.lucene.index.codecs.TermStats; -import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; final class FreqProxTermsWriter extends TermsHashConsumer { @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) { - return new FreqProxTermsWriterPerThread(perThread); - } - - @Override void abort() {} - private int flushedDocCount; - // TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... @Override - public void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + public void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { // Gather all FieldData's that have postings, across all // ThreadStates List allFields = new ArrayList(); - - flushedDocCount = state.numDocs; - for (Map.Entry> entry : threadsAndFields.entrySet()) { - - Collection fields = entry.getValue(); - - - for (final TermsHashConsumerPerField i : fields) { - final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) i; - if (perField.termsHashPerField.bytesHash.size() > 0) + for (TermsHashConsumerPerField f : fieldsToFlush.values()) { + final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f; + if (perField.termsHashPerField.bytesHash.size() > 0) { allFields.add(perField); - } + } } final int numAllFields = allFields.size(); @@ -77,6 +57,8 @@ final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); + TermsHash termsHash = null; + /* Current writer chain: FieldsConsumer @@ -89,257 +71,48 @@ -> IMPL: FormatPostingsPositionsWriter */ - int start = 0; - while(start < numAllFields) { - final FieldInfo fieldInfo = allFields.get(start).fieldInfo; - final String fieldName = fieldInfo.name; + for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { + final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo; - int end = start+1; - while(end < numAllFields && allFields.get(end).fieldInfo.name.equals(fieldName)) - end++; - - FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end-start]; - for(int i=start;i> entry : threadsAndFields.entrySet()) { - FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + if (termsHash != null) { + termsHash.reset(); } consumer.close(); } BytesRef payload; - /* Walk through all unique text tokens (Posting - * instances) found in this field and serialize them - * into a single RAM segment. */ - void appendPostings(String fieldName, SegmentWriteState state, - FreqProxTermsWriterPerField[] fields, - FieldsConsumer consumer) - throws CorruptIndexException, IOException { + @Override + public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { + return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); + } - int numFields = fields.length; + @Override + void finishDocument(TermsHash termsHash) throws IOException { + } - final BytesRef text = new BytesRef(); - - final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; - - final TermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo); - final Comparator termComp = termsConsumer.getComparator(); - - for(int i=0;i 0; if (omitTermFreqAndPositions) { @@ -169,7 +177,7 @@ } } } - + @Override ParallelPostingsArray createPostingsArray(int size) { return new FreqProxPostingsArray(size); @@ -212,7 +220,180 @@ return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT; } } - + public void abort() {} + + BytesRef payload; + + /* Walk through all unique text tokens (Posting + * instances) found in this field and serialize them + * into a single RAM segment. */ + void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state) + throws CorruptIndexException, IOException { + + final TermsConsumer termsConsumer = consumer.addField(fieldInfo); + final Comparator termComp = termsConsumer.getComparator(); + + final Term protoTerm = new Term(fieldName); + + final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + + final Map segDeletes; + if (state.segDeletes != null && state.segDeletes.terms.size() > 0) { + segDeletes = state.segDeletes.terms; + } else { + segDeletes = null; + } + + final int[] termIDs = termsHashPerField.sortPostings(termComp); + final int numTerms = termsHashPerField.bytesHash.size(); + final BytesRef text = new BytesRef(); + final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; + final ByteSliceReader freq = new ByteSliceReader(); + final ByteSliceReader prox = new ByteSliceReader(); + + long sumTotalTermFreq = 0; + for (int i = 0; i < numTerms; i++) { + final int termID = termIDs[i]; + // Get BytesRef + final int textStart = postings.textStarts[termID]; + termsHashPerField.bytePool.setBytesRef(text, textStart); + + termsHashPerField.initReader(freq, termID, 0); + if (!fieldInfo.omitTermFreqAndPositions) { + termsHashPerField.initReader(prox, termID, 1); + } + + // TODO: really TermsHashPerField should take over most + // of this loop, including merge sort of terms from + // multiple threads and interacting with the + // TermsConsumer, only calling out to us (passing us the + // DocsConsumer) to handle delivery of docs/positions + + final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text); + + final int delDocLimit; + if (segDeletes != null) { + final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text)); + if (docIDUpto != null) { + delDocLimit = docIDUpto; + } else { + delDocLimit = 0; + } + } else { + delDocLimit = 0; + } + + // Now termStates has numToMerge FieldMergeStates + // which all share the same term. Now we must + // interleave the docID streams. + int numDocs = 0; + long totTF = 0; + int docID = 0; + int termFreq = 0; + + while(true) { + if (freq.eof()) { + if (postings.lastDocCodes[termID] != -1) { + // Return last doc + docID = postings.lastDocIDs[termID]; + if (!omitTermFreqAndPositions) { + termFreq = postings.docFreqs[termID]; + } + postings.lastDocCodes[termID] = -1; + } else { + // EOF + break; + } + } else { + final int code = freq.readVInt(); + if (omitTermFreqAndPositions) { + docID += code; + } else { + docID += code >>> 1; + if ((code & 1) != 0) { + termFreq = 1; + } else { + termFreq = freq.readVInt(); + } + } + + assert docID != postings.lastDocIDs[termID]; + } + + numDocs++; + assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs; + final int termDocFreq = termFreq; + + // NOTE: we could check here if the docID was + // deleted, and skip it. However, this is somewhat + // dangerous because it can yield non-deterministic + // behavior since we may see the docID before we see + // the term that caused it to be deleted. This + // would mean some (but not all) of its postings may + // make it into the index, which'd alter the docFreq + // for those terms. We could fix this by doing two + // passes, ie first sweep marks all del docs, and + // 2nd sweep does the real flush, but I suspect + // that'd add too much time to flush. + postingsConsumer.startDoc(docID, termDocFreq); + if (docID < delDocLimit) { + // Mark it deleted. TODO: we could also skip + // writing its postings; this would be + // deterministic (just for this Term's docs). + if (state.deletedDocs == null) { + state.deletedDocs = new BitVector(state.numDocs); + } + state.deletedDocs.set(docID); + } + + // Carefully copy over the prox + payload info, + // changing the format to match Lucene's segment + // format. + if (!currentFieldOmitTermFreqAndPositions) { + // omitTermFreqAndPositions == false so we do write positions & + // payload + int position = 0; + totTF += termDocFreq; + for(int j=0;j> 1; + + final int payloadLength; + final BytesRef thisPayload; + + if ((code & 1) != 0) { + // This position has a payload + payloadLength = prox.readVInt(); + + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payload.bytes.length < payloadLength) { + payload.grow(payloadLength); + } + + prox.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; + thisPayload = payload; + + } else { + payloadLength = 0; + thisPayload = null; + } + + postingsConsumer.addPosition(position, thisPayload); + } + + postingsConsumer.finishDoc(); + } + } + termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); + sumTotalTermFreq += totTF; + } + + termsConsumer.finish(sumTotalTermFreq); + } + } Index: lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java (revision 0) @@ -0,0 +1,128 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; + +/** + * Default {@link FlushPolicy} implementation that flushes based on RAM used, + * document count and number of buffered deletes depending on the IndexWriter's + * {@link IndexWriterConfig}. + * + *

    + *
  • {@link #onDelete(DocumentsWriterFlushControl, ThreadState)} - flushes + * based on the global number of buffered delete terms iff + * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is enabled
  • + *
  • {@link #onInsert(DocumentsWriterFlushControl, ThreadState)} - flushes + * either on the number of documents per {@link DocumentsWriterPerThread} ( + * {@link DocumentsWriterPerThread#getNumDocsInRAM()}) or on the global active + * memory consumption in the current indexing session iff + * {@link IndexWriterConfig#getMaxBufferedDocs()} or + * {@link IndexWriterConfig#getRAMBufferSizeMB()} is enabled respectively
  • + *
  • {@link #onUpdate(DocumentsWriterFlushControl, ThreadState)} - calls + * {@link #onInsert(DocumentsWriterFlushControl, ThreadState)} and + * {@link #onDelete(DocumentsWriterFlushControl, ThreadState)} in order
  • + *
+ * All {@link IndexWriterConfig} settings are used to mark + * {@link DocumentsWriterPerThread} as flush pending during indexing with + * respect to their live updates. + *

+ * If {@link IndexWriterConfig#setRAMBufferSizeMB(double)} is enabled, the + * largest ram consuming {@link DocumentsWriterPerThread} will be marked as + * pending iff the global active RAM consumption is >= the configured max RAM + * buffer. + */ +public class FlushByRamOrCountsPolicy extends FlushPolicy { + + @Override + public void onDelete(DocumentsWriterFlushControl control, ThreadState state) { + if (flushOnDeleteTerms()) { + // Flush this state by num del terms + final int maxBufferedDeleteTerms = indexWriterConfig + .getMaxBufferedDeleteTerms(); + if (control.getNumGlobalTermDeletes() >= maxBufferedDeleteTerms) { + control.setApplyAllDeletes(); + } + } + final DocumentsWriter writer = this.writer.get(); + // If deletes alone are consuming > 1/2 our RAM + // buffer, force them all to apply now. This is to + // prevent too-frequent flushing of a long tail of + // tiny segments: + if ((flushOnRAM() && + writer.deleteQueue.bytesUsed() > (1024*1024*indexWriterConfig.getRAMBufferSizeMB()/2))) { + control.setApplyAllDeletes(); + if (writer.infoStream != null) { + writer.message("force apply deletes bytesUsed=" + writer.deleteQueue.bytesUsed() + " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB())); + } + } + } + + @Override + public void onInsert(DocumentsWriterFlushControl control, ThreadState state) { + if (flushOnDocCount() + && state.perThread.getNumDocsInRAM() >= indexWriterConfig + .getMaxBufferedDocs()) { + // Flush this state by num docs + control.setFlushPending(state); + } else if (flushOnRAM()) {// flush by RAM + final long limit = (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024.d * 1024.d); + final long totalRam = control.activeBytes(); + if (totalRam >= limit) { + markLargestWriterPending(control, state, totalRam); + } + } + } + + /** + * Marks the most ram consuming active {@link DocumentsWriterPerThread} flush + * pending + */ + protected void markLargestWriterPending(DocumentsWriterFlushControl control, + ThreadState perThreadState, final long currentBytesPerThread) { + control + .setFlushPending(findLargestNonPendingWriter(control, perThreadState)); + } + + /** + * Returns true if this {@link FlushPolicy} flushes on + * {@link IndexWriterConfig#getMaxBufferedDocs()}, otherwise + * false. + */ + protected boolean flushOnDocCount() { + return indexWriterConfig.getMaxBufferedDocs() != IndexWriterConfig.DISABLE_AUTO_FLUSH; + } + + /** + * Returns true if this {@link FlushPolicy} flushes on + * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()}, otherwise + * false. + */ + protected boolean flushOnDeleteTerms() { + return indexWriterConfig.getMaxBufferedDeleteTerms() != IndexWriterConfig.DISABLE_AUTO_FLUSH; + } + + /** + * Returns true if this {@link FlushPolicy} flushes on + * {@link IndexWriterConfig#getRAMBufferSizeMB()}, otherwise + * false. + */ + protected boolean flushOnRAM() { + return indexWriterConfig.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH; + } +} Property changes on: lucene\src\java\org\apache\lucene\index\FlushByRamOrCountsPolicy.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (working copy) @@ -17,87 +17,62 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; -import java.io.IOException; -import java.util.Collection; - -import java.util.Map; - final class TermVectorsTermsWriter extends TermsHashConsumer { - final DocumentsWriter docWriter; - PerDoc[] docFreeList = new PerDoc[1]; + final DocumentsWriterPerThread docWriter; int freeCount; IndexOutput tvx; IndexOutput tvd; IndexOutput tvf; int lastDocID; + + final DocumentsWriterPerThread.DocState docState; + final BytesRef flushTerm = new BytesRef(); + + // Used by perField when serializing the term vectors + final ByteSliceReader vectorSliceReader = new ByteSliceReader(); boolean hasVectors; - public TermVectorsTermsWriter(DocumentsWriter docWriter) { + public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + docState = docWriter.docState; } @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) { - return new TermVectorsTermsWriterPerThread(termsHashPerThread, this); - } - - @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { if (tvx != null) { // At least one doc in this run had term vectors enabled fill(state.numDocs); + assert state.segmentName != null; + String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.close(); tvf.close(); tvd.close(); - tvx = tvd = tvf = null; - assert state.segmentName != null; - String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); - if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) { + tvx = null; + if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName)); - } lastDocID = 0; state.hasVectors = hasVectors; hasVectors = false; } - for (Map.Entry> entry : threadsAndFields.entrySet()) { - for (final TermsHashConsumerPerField field : entry.getValue() ) { - TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; - perField.termsHashPerField.reset(); - perField.shrinkHash(); - } - - TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) { + TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; + perField.termsHashPerField.reset(); + perField.shrinkHash(); } } - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } - /** Fills in no-term-vectors for all docs we haven't seen * since the last doc that had term vectors. */ void fill(int docID) throws IOException { @@ -112,18 +87,17 @@ } } - synchronized void initTermVectorsWriter() throws IOException { + private final void initTermVectorsWriter() throws IOException { if (tvx == null) { // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: - hasVectors = true; tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION)); tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - + tvx.writeInt(TermVectorsReader.FORMAT_CURRENT); tvd.writeInt(TermVectorsReader.FORMAT_CURRENT); tvf.writeInt(TermVectorsReader.FORMAT_CURRENT); @@ -132,39 +106,44 @@ } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + @Override + void finishDocument(TermsHash termsHash) throws IOException { assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start"); + if (!hasVectors) { + return; + } + initTermVectorsWriter(); - fill(perDoc.docID); + fill(docState.docID); // Append term vectors to the real outputs: - tvx.writeLong(tvd.getFilePointer()); + long pointer = tvd.getFilePointer(); + tvx.writeLong(pointer); tvx.writeLong(tvf.getFilePointer()); - tvd.writeVInt(perDoc.numVectorFields); - if (perDoc.numVectorFields > 0) { - for(int i=0;i 0) { + for(int i=0;i threadBindings = new HashMap(); - - boolean bufferIsFull; // True when it's time to write segment - private boolean aborting; // True if an abort is pending - PrintStream infoStream; SimilarityProvider similarityProvider; - // max # simultaneous threads; if there are more than - // this, they wait for others to finish first - private final int maxThreadStates; + List newFiles; - // TODO: cutover to BytesRefHash - // Deletes for our still-in-RAM (to be flushed next) segment - private BufferedDeletes pendingDeletes = new BufferedDeletes(false); - - static class DocState { - DocumentsWriter docWriter; - Analyzer analyzer; - PrintStream infoStream; - SimilarityProvider similarityProvider; - int docID; - Document doc; - String maxTermPrefix; + final IndexWriter indexWriter; - // Only called by asserts - public boolean testPoint(String name) { - return docWriter.writer.testPoint(name); - } + private AtomicInteger numDocsInRAM = new AtomicInteger(0); - public void clear() { - // don't hold onto doc nor analyzer, in case it is - // largish: - doc = null; - analyzer = null; - } - } + // TODO: cut over to BytesRefHash in BufferedDeletes + volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue(); + private final Queue ticketQueue = new LinkedList(); - /** Consumer returns this on each doc. This holds any - * state that must be flushed synchronized "in docID - * order". We gather these and flush them in order. */ - abstract static class DocWriter { - DocWriter next; - int docID; - abstract void finish() throws IOException; - abstract void abort(); - abstract long sizeInBytes(); + private Collection abortedFiles; // List of files that were written before last abort() - void setNext(DocWriter next) { - this.next = next; - } - } + final IndexingChain chain; - /** - * Create and return a new DocWriterBuffer. - */ - PerDocBuffer newPerDocBuffer() { - return new PerDocBuffer(); - } - - /** - * RAMFile buffer for DocWriters. - */ - class PerDocBuffer extends RAMFile { - - /** - * Allocate bytes used from shared pool. - */ - @Override - protected byte[] newBuffer(int size) { - assert size == PER_DOC_BLOCK_SIZE; - return perDocAllocator.getByteBlock(); + final DocumentsWriterPerThreadPool perThreadPool; + final FlushPolicy flushPolicy; + final DocumentsWriterFlushControl flushControl; + final Healthiness healthiness; + DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers, + BufferedDeletesStream bufferedDeletesStream) throws IOException { + this.directory = directory; + this.indexWriter = writer; + this.similarityProvider = config.getSimilarityProvider(); + this.perThreadPool = config.getIndexerThreadPool(); + this.chain = config.getIndexingChain(); + this.perThreadPool.initialize(this, globalFieldNumbers, config); + final FlushPolicy configuredPolicy = config.getFlushPolicy(); + if (configuredPolicy == null) { + flushPolicy = new FlushByRamOrCountsPolicy(); + } else { + flushPolicy = configuredPolicy; } + flushPolicy.init(this); - /** - * Recycle the bytes used. - */ - synchronized void recycle() { - if (buffers.size() > 0) { - setLength(0); - - // Recycle the blocks - perDocAllocator.recycleByteBlocks(buffers); - buffers.clear(); - sizeInBytes = 0; - - assert numBuffers() == 0; - } - } + healthiness = new Healthiness(); + final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024; + flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT); } - - /** - * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method - * which returns the DocConsumer that the DocumentsWriter calls to process the - * documents. - */ - abstract static class IndexingChain { - abstract DocConsumer getChain(DocumentsWriter documentsWriter); - } - - static final IndexingChain defaultIndexingChain = new IndexingChain() { - @Override - DocConsumer getChain(DocumentsWriter documentsWriter) { - /* - This is the current indexing chain: - - DocConsumer / DocConsumerPerThread - --> code: DocFieldProcessor / DocFieldProcessorPerThread - --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - --> code: DocInverter / DocInverterPerThread / DocInverterPerField - --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: TermsHash / TermsHashPerThread / TermsHashPerField - --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - */ - - // Build up indexing chain: - - final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); - final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - /* - * nesting TermsHash instances here to allow the secondary (TermVectors) share the interned postings - * via a shared ByteBlockPool. See TermsHashPerField for details. - */ - final TermsHash termVectorsTermHash = new TermsHash(documentsWriter, false, termVectorsWriter, null); - final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, termVectorsTermHash); - final NormsWriter normsWriter = new NormsWriter(); - final DocInverter docInverter = new DocInverter(termsHash, normsWriter); - return new DocFieldProcessor(documentsWriter, docInverter); + synchronized void deleteQueries(final Query... queries) throws IOException { + deleteQueue.addDelete(queries); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - }; - - final DocConsumer consumer; - - // How much RAM we can use before flushing. This is 0 if - // we are flushing by doc count instead. - - private final IndexWriterConfig config; - - private boolean closed; - private FieldInfos fieldInfos; - - private final BufferedDeletesStream bufferedDeletesStream; - private final IndexWriter.FlushControl flushControl; - - DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos, - BufferedDeletesStream bufferedDeletesStream) throws IOException { - this.directory = directory; - this.writer = writer; - this.similarityProvider = config.getSimilarityProvider(); - this.maxThreadStates = config.getMaxThreadStates(); - this.fieldInfos = fieldInfos; - this.bufferedDeletesStream = bufferedDeletesStream; - flushControl = writer.flushControl; - consumer = config.getIndexingChain().getChain(this); - this.config = config; } - // Buffer a specific docID for deletion. Currently only - // used when we hit a exception when adding a document - synchronized void deleteDocID(int docIDUpto) { - pendingDeletes.addDocID(docIDUpto); - // NOTE: we do not trigger flush here. This is - // potentially a RAM leak, if you have an app that tries - // to add docs but every single doc always hits a - // non-aborting exception. Allowing a flush here gets - // very messy because we are only invoked when handling - // exceptions so to do this properly, while handling an - // exception we'd have to go off and flush new deletes - // which is risky (likely would hit some other - // confounding exception). - } - - boolean deleteQueries(Query... queries) { - final boolean doFlush = flushControl.waitUpdate(0, queries.length); - synchronized(this) { - for (Query query : queries) { - pendingDeletes.addQuery(query, numDocs); - } + // TODO: we could check w/ FreqProxTermsWriter: if the + // term doesn't exist, don't bother buffering into the + // per-DWPT map (but still must go into the global map) + synchronized void deleteTerms(final Term... terms) throws IOException { + final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue; + deleteQueue.addDelete(terms); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - return doFlush; } - - boolean deleteQuery(Query query) { - final boolean doFlush = flushControl.waitUpdate(0, 1); - synchronized(this) { - pendingDeletes.addQuery(query, numDocs); - } - return doFlush; + + DocumentsWriterDeleteQueue currentDeleteSession() { + return deleteQueue; } - boolean deleteTerms(Term... terms) { - final boolean doFlush = flushControl.waitUpdate(0, terms.length); - synchronized(this) { - for (Term term : terms) { - pendingDeletes.addTerm(term, numDocs); + private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException { + if (deleteQueue != null) { + synchronized (ticketQueue) { + // Freeze and insert the delete flush ticket in the queue + ticketQueue.add(new FlushTicket(deleteQueue.freezeGlobalBuffer(null), false)); + applyFlushTickets(); } } - return doFlush; + indexWriter.applyAllDeletes(); + indexWriter.flushCount.incrementAndGet(); } - // TODO: we could check w/ FreqProxTermsWriter: if the - // term doesn't exist, don't bother buffering into the - // per-DWPT map (but still must go into the global map) - boolean deleteTerm(Term term, boolean skipWait) { - final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait); - synchronized(this) { - pendingDeletes.addTerm(term, numDocs); - } - return doFlush; - } - - /** If non-null, various details of indexing are printed - * here. */ synchronized void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; - for(int i=0;i it = perThreadPool.getAllPerThreadsIterator(); + while (it.hasNext()) { + it.next().perThread.docState.infoStream = infoStream; } } - /** Get current segment name we are writing. */ - synchronized String getSegment() { - return segment; + /** Returns how many docs are currently buffered in RAM. */ + int getNumDocs() { + return numDocsInRAM.get(); } - /** Returns how many docs are currently buffered in RAM. */ - synchronized int getNumDocs() { - return numDocs; + Collection abortedFiles() { + return abortedFiles; } - void message(String message) { + // returns boolean for asserts + boolean message(String message) { if (infoStream != null) { - writer.message("DW: " + message); + indexWriter.message("DW: " + message); } + return true; } - synchronized void setAborting() { - if (infoStream != null) { - message("setAborting"); + private void ensureOpen() throws AlreadyClosedException { + if (closed) { + throw new AlreadyClosedException("this IndexWriter is closed"); } - aborting = true; } /** Called if we hit an exception at a bad time (when @@ -378,816 +220,335 @@ * currently buffered docs. This resets our state, * discarding any docs added since last flush. */ synchronized void abort() throws IOException { - if (infoStream != null) { - message("docWriter: abort"); - } - boolean success = false; - try { + synchronized (this) { + deleteQueue.clear(); + } - // Forcefully remove waiting ThreadStates from line - waitQueue.abort(); - - // Wait for all other threads to finish with - // DocumentsWriter: - waitIdle(); - + try { if (infoStream != null) { - message("docWriter: abort waitIdle done"); + message("docWriter: abort"); } - assert 0 == waitQueue.numWaiting: "waitQueue.numWaiting=" + waitQueue.numWaiting; + final Iterator threadsIterator = perThreadPool.getActivePerThreadsIterator(); - waitQueue.waitingBytes = 0; - - pendingDeletes.clear(); - - for (DocumentsWriterThreadState threadState : threadStates) + while (threadsIterator.hasNext()) { + ThreadState perThread = threadsIterator.next(); + perThread.lock(); try { - threadState.consumer.abort(); - } catch (Throwable t) { + if (perThread.isActive()) { // we might be closed + perThread.perThread.abort(); + perThread.perThread.checkAndResetHasAborted(); + } else { + assert closed; + } + } finally { + perThread.unlock(); } - - try { - consumer.abort(); - } catch (Throwable t) { } - // Reset all postings data - doAfterFlush(); success = true; } finally { - aborting = false; - notifyAll(); if (infoStream != null) { - message("docWriter: done abort; success=" + success); + message("docWriter: done abort; abortedFiles=" + abortedFiles + " success=" + success); } } } - /** Reset after a flush */ - private void doAfterFlush() throws IOException { - // All ThreadStates should be idle when we are called - assert allThreadsIdle(); - for (DocumentsWriterThreadState threadState : threadStates) { - threadState.consumer.doAfterFlush(); - } - - threadBindings.clear(); - waitQueue.reset(); - segment = null; - fieldInfos = new FieldInfos(fieldInfos); - numDocs = 0; - nextDocID = 0; - bufferIsFull = false; - for(int i=0;i BD - final long delGen = bufferedDeletesStream.getNextGen(); - if (pendingDeletes.any()) { - if (segmentInfos.size() > 0 || newSegment != null) { - final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen); - if (infoStream != null) { - message("flush: push buffered deletes startSize=" + pendingDeletes.bytesUsed.get() + " frozenSize=" + packet.bytesUsed); - } - bufferedDeletesStream.push(packet); - if (infoStream != null) { - message("flush: delGen=" + packet.gen); - } - if (newSegment != null) { - newSegment.setBufferedDeletesGen(packet.gen); - } - } else { - if (infoStream != null) { - message("flush: drop buffered deletes: no segments"); - } - // We can safely discard these deletes: since - // there are no segments, the deletions cannot - // affect anything. - } - pendingDeletes.clear(); - } else if (newSegment != null) { - newSegment.setBufferedDeletesGen(delGen); - } + void close() { + closed = true; + flushControl.setClosed(); } - public boolean anyDeletions() { - return pendingDeletes.any(); - } + boolean updateDocument(final Document doc, final Analyzer analyzer, + final Term delTerm) throws CorruptIndexException, IOException { + ensureOpen(); + boolean maybeMerge = false; + final boolean isUpdate = delTerm != null; + if (healthiness.anyStalledThreads()) { - /** Flush all pending docs to a new segment */ - // Lock order: IW -> DW - synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException { - - final long startTime = System.currentTimeMillis(); - - // We change writer's segmentInfos: - assert Thread.holdsLock(writer); - - waitIdle(); - - if (numDocs == 0) { - // nothing to do! + // Help out flushing any pending DWPTs so we can un-stall: if (infoStream != null) { - message("flush: no docs; skipping"); + message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)"); } - // Lock order: IW -> DW -> BD - pushDeletes(null, segmentInfos); - return null; - } - if (aborting) { - if (infoStream != null) { - message("flush: skip because aborting is set"); - } - return null; - } - - boolean success = false; - - SegmentInfo newSegment; - - try { - assert nextDocID == numDocs; - assert waitQueue.numWaiting == 0; - assert waitQueue.waitingBytes == 0; - - if (infoStream != null) { - message("flush postings as segment " + segment + " numDocs=" + numDocs); - } - - final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos, - numDocs, writer.getConfig().getTermIndexInterval(), - fieldInfos.buildSegmentCodecs(true), - pendingDeletes); - // Apply delete-by-docID now (delete-byDocID only - // happens when an exception is hit processing that - // doc, eg if analyzer has some problem w/ the text): - if (pendingDeletes.docIDs.size() > 0) { - flushState.deletedDocs = new BitVector(numDocs); - for(int delDocID : pendingDeletes.docIDs) { - flushState.deletedDocs.set(delDocID); + // Try pick up pending threads here if possible + DocumentsWriterPerThread flushingDWPT; + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + // Don't push the delete here since the update could fail! + maybeMerge = doFlush(flushingDWPT); + if (!healthiness.anyStalledThreads()) { + break; } - pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); - pendingDeletes.docIDs.clear(); } - newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos); - - Collection threads = new HashSet(); - for (DocumentsWriterThreadState threadState : threadStates) { - threads.add(threadState.consumer); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter still has stalled threads; waiting"); } - double startMBUsed = bytesUsed()/1024./1024.; + healthiness.waitIfStalled(); // block if stalled - consumer.flush(threads, flushState); - - newSegment.setHasVectors(flushState.hasVectors); - - if (infoStream != null) { - message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); - if (flushState.deletedDocs != null) { - message("new segment has " + flushState.deletedDocs.count() + " deleted docs"); - } - message("flushedFiles=" + newSegment.files()); - message("flushed codecs=" + newSegment.getSegmentCodecs()); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter done waiting"); } + } - if (mergePolicy.useCompoundFile(segmentInfos, newSegment)) { - final String cfsFileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), + this, doc); + final DocumentsWriterPerThread flushingDWPT; + + try { - if (infoStream != null) { - message("flush: create compound file \"" + cfsFileName + "\""); - } - - CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, cfsFileName); - for(String fileName : newSegment.files()) { - cfsWriter.addFile(fileName); - } - cfsWriter.close(); - deleter.deleteNewFiles(newSegment.files()); - newSegment.setUseCompoundFile(true); + if (!perThread.isActive()) { + ensureOpen(); + assert false: "perThread is not active but we are still open"; } - - // Must write deleted docs after the CFS so we don't - // slurp the del file into CFS: - if (flushState.deletedDocs != null) { - final int delCount = flushState.deletedDocs.count(); - assert delCount > 0; - newSegment.setDelCount(delCount); - newSegment.advanceDelGen(); - final String delFileName = newSegment.getDelFileName(); - if (infoStream != null) { - message("flush: write " + delCount + " deletes to " + delFileName); + + final DocumentsWriterPerThread dwpt = perThread.perThread; + try { + dwpt.updateDocument(doc, analyzer, delTerm); + numDocsInRAM.incrementAndGet(); + } finally { + if (dwpt.checkAndResetHasAborted()) { + flushControl.doOnAbort(perThread); } - boolean success2 = false; - try { - // TODO: in the NRT case it'd be better to hand - // this del vector over to the - // shortly-to-be-opened SegmentReader and let it - // carry the changes; there's no reason to use - // filesystem as intermediary here. - flushState.deletedDocs.write(directory, delFileName); - success2 = true; - } finally { - if (!success2) { - try { - directory.deleteFile(delFileName); - } catch (Throwable t) { - // suppress this so we keep throwing the - // original exception - } - } - } } - - if (infoStream != null) { - message("flush: segment=" + newSegment); - final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; - final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; - message(" ramUsed=" + nf.format(startMBUsed) + " MB" + - " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + - " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + - " docs/MB=" + nf.format(numDocs / newSegmentSize) + - " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); - } - - success = true; + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); } finally { - notifyAll(); - if (!success) { - if (segment != null) { - deleter.refresh(segment); - } - abort(); - } + perThread.unlock(); } - - doAfterFlush(); - - // Lock order: IW -> DW -> BD - pushDeletes(newSegment, segmentInfos); - if (infoStream != null) { - message("flush time " + (System.currentTimeMillis()-startTime) + " msec"); - } - - return newSegment; - } - - synchronized void close() { - closed = true; - notifyAll(); - } - - /** Returns a free (idle) ThreadState that may be used for - * indexing this one document. This call also pauses if a - * flush is pending. If delTerm is non-null then we - * buffer this deleted term after the thread state has - * been acquired. */ - synchronized DocumentsWriterThreadState getThreadState(Document doc, Term delTerm) throws IOException { - - final Thread currentThread = Thread.currentThread(); - assert !Thread.holdsLock(writer); - - // First, find a thread state. If this thread already - // has affinity to a specific ThreadState, use that one - // again. - DocumentsWriterThreadState state = threadBindings.get(currentThread); - if (state == null) { - - // First time this thread has called us since last - // flush. Find the least loaded thread state: - DocumentsWriterThreadState minThreadState = null; - for(int i=0;i= maxThreadStates)) { - state = minThreadState; - state.numThreads++; - } else { - // Just create a new "private" thread state - DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1+threadStates.length]; - if (threadStates.length > 0) { - System.arraycopy(threadStates, 0, newArray, 0, threadStates.length); - } - state = newArray[threadStates.length] = new DocumentsWriterThreadState(this); - threadStates = newArray; - } - threadBindings.put(currentThread, state); } - - // Next, wait until my thread state is idle (in case - // it's shared with other threads), and no flush/abort - // pending - waitReady(state); - - // Allocate segment name if this is the first doc since - // last flush: - if (segment == null) { - segment = writer.newSegmentName(); - assert numDocs == 0; - } - - state.docState.docID = nextDocID++; - - if (delTerm != null) { - pendingDeletes.addTerm(delTerm, state.docState.docID); - } - - numDocs++; - state.isIdle = false; - return state; + return maybeMerge; } - - boolean addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - return updateDocument(doc, analyzer, null); - } - - boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm) - throws CorruptIndexException, IOException { - // Possibly trigger a flush, or wait until any running flush completes: - boolean doFlush = flushControl.waitUpdate(1, delTerm != null ? 1 : 0); - - // This call is synchronized but fast - final DocumentsWriterThreadState state = getThreadState(doc, delTerm); - - final DocState docState = state.docState; - docState.doc = doc; - docState.analyzer = analyzer; - - boolean success = false; - try { - // This call is not synchronized and does all the - // work - final DocWriter perDoc; + private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { + boolean maybeMerge = false; + while (flushingDWPT != null) { + maybeMerge = true; + boolean success = false; + FlushTicket ticket = null; + try { - perDoc = state.consumer.processDocument(fieldInfos); - } finally { - docState.clear(); - } - - // This call is synchronized but fast - finishDocument(state, perDoc); - - success = true; - } finally { - if (!success) { - - // If this thread state had decided to flush, we - // must clear it so another thread can flush - if (doFlush) { - flushControl.clearFlushPending(); - } - - if (infoStream != null) { - message("exception in updateDocument aborting=" + aborting); - } - - synchronized(this) { - - state.isIdle = true; - notifyAll(); - - if (aborting) { - abort(); - } else { - skipDocWriter.docID = docState.docID; - boolean success2 = false; - try { - waitQueue.add(skipDocWriter); - success2 = true; - } finally { - if (!success2) { - abort(); - return false; - } + assert currentFullFlushDelQueue == null + || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " + + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + + " " + flushControl.isFullFlush(); + /* + * Since with DWPT the flush process is concurrent and several DWPT + * could flush at the same time we must maintain the order of the + * flushes before we can apply the flushed segment and the frozen global + * deletes it is buffering. The reason for this is that the global + * deletes mark a certain point in time where we took a DWPT out of + * rotation and freeze the global deletes. + * + * Example: A flush 'A' starts and freezes the global deletes, then + * flush 'B' starts and freezes all deletes occurred since 'A' has + * started. if 'B' finishes before 'A' we need to wait until 'A' is done + * otherwise the deletes frozen by 'B' are not applied to 'A' and we + * might miss to deletes documents in 'A'. + */ + try { + synchronized (ticketQueue) { + // Each flush is assigned a ticket in the order they accquire the ticketQueue lock + ticket = new FlushTicket(flushingDWPT.prepareFlush(), true); + ticketQueue.add(ticket); + } + + // flush concurrently without locking + final FlushedSegment newSegment = flushingDWPT.flush(); + synchronized (ticketQueue) { + ticket.segment = newSegment; + } + // flush was successful once we reached this point - new seg. has been assigned to the ticket! + success = true; + } finally { + if (!success && ticket != null) { + synchronized (ticketQueue) { + // In the case of a failure make sure we are making progress and + // apply all the deletes since the segment flush failed since the flush + // ticket could hold global deletes see FlushTicket#canPublish() + ticket.isSegmentFlush = false; } - - // Immediately mark this document as deleted - // since likely it was partially added. This - // keeps indexing as "all or none" (atomic) when - // adding a document: - deleteDocID(state.docState.docID); } } + /* + * Now we are done and try to flush the ticket queue if the head of the + * queue has already finished the flush. + */ + applyFlushTickets(); + } finally { + flushControl.doAfterFlush(flushingDWPT); + flushingDWPT.checkAndResetHasAborted(); + indexWriter.flushCount.incrementAndGet(); } + + flushingDWPT = flushControl.nextPendingFlush(); } - - doFlush |= flushControl.flushByRAMUsage("new document"); - - return doFlush; + return maybeMerge; } - public synchronized void waitIdle() { - while (!allThreadsIdle()) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + private void applyFlushTickets() throws IOException { + synchronized (ticketQueue) { + while (true) { + // Keep publishing eligible flushed segments: + final FlushTicket head = ticketQueue.peek(); + if (head != null && head.canPublish()) { + ticketQueue.poll(); + finishFlush(head.segment, head.frozenDeletes); + } else { + break; + } } } } - synchronized void waitReady(DocumentsWriterThreadState state) { - while (!closed && (!state.isIdle || aborting)) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + private void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes) + throws IOException { + // Finish the flushed segment and publish it to IndexWriter + if (newSegment == null) { + assert bufferedDeletes != null; + if (bufferedDeletes != null && bufferedDeletes.any()) { + indexWriter.bufferedDeletesStream.push(bufferedDeletes); + if (infoStream != null) { + message("flush: push buffered deletes: " + bufferedDeletes); + } } + } else { + publishFlushedSegment(newSegment, bufferedDeletes); } - - if (closed) { - throw new AlreadyClosedException("this IndexWriter is closed"); - } } - /** Does the synchronized work to finish/flush the - * inverted document. */ - private void finishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) throws IOException { - - // Must call this w/o holding synchronized(this) else - // we'll hit deadlock: - balanceRAM(); - - synchronized(this) { - - assert docWriter == null || docWriter.docID == perThread.docState.docID; - - if (aborting) { - - // We are currently aborting, and another thread is - // waiting for me to become idle. We just forcefully - // idle this threadState; it will be fully reset by - // abort() - if (docWriter != null) { - try { - docWriter.abort(); - } catch (Throwable t) { - } - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); - - return; - } - - final boolean doPause; - - if (docWriter != null) { - doPause = waitQueue.add(docWriter); - } else { - skipDocWriter.docID = perThread.docState.docID; - doPause = waitQueue.add(skipDocWriter); - } - - if (doPause) { - waitForWaitQueue(); - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); + final void subtractFlushedNumDocs(int numFlushed) { + int oldValue = numDocsInRAM.get(); + while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) { + oldValue = numDocsInRAM.get(); } } - - synchronized void waitForWaitQueue() { - do { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + + /** + * Publishes the flushed segment, segment private deletes (if any) and its + * associated global delete (if present) to IndexWriter. The actual + * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s + * delete generation is always GlobalPacket_deleteGeneration + 1 + */ + private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket) + throws IOException { + assert newSegment != null; + final SegmentInfo segInfo = indexWriter.prepareFlushedSegment(newSegment); + final BufferedDeletes deletes = newSegment.segmentDeletes; + FrozenBufferedDeletes packet = null; + if (deletes != null && deletes.any()) { + // Segment private delete + packet = new FrozenBufferedDeletes(deletes, true); + if (infoStream != null) { + message("flush: push buffered seg private deletes: " + packet); } - } while (!waitQueue.doResume()); - } - - private static class SkipDocWriter extends DocWriter { - @Override - void finish() { } - @Override - void abort() { - } - @Override - long sizeInBytes() { - return 0; - } - } - final SkipDocWriter skipDocWriter = new SkipDocWriter(); - NumberFormat nf = NumberFormat.getInstance(); - - /* Initial chunks size of the shared byte[] blocks used to - store postings data */ - final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; - - /* if you increase this, you must fix field cache impl for - * getTerms/getTermsIndex requires <= 32768. */ - final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; - - /* Initial chunks size of the shared int[] blocks used to - store postings data */ - final static int INT_BLOCK_SHIFT = 13; - final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; - final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; - - private List freeIntBlocks = new ArrayList(); - - /* Allocate another int[] from the shared pool */ - synchronized int[] getIntBlock() { - final int size = freeIntBlocks.size(); - final int[] b; - if (0 == size) { - b = new int[INT_BLOCK_SIZE]; - bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT); - } else { - b = freeIntBlocks.remove(size-1); - } - return b; + // now publish! + indexWriter.publishFlushedSegment(segInfo, packet, globalPacket); } - - long bytesUsed() { - return bytesUsed.get() + pendingDeletes.bytesUsed.get(); + + // for asserts + private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null; + // for asserts + private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) { + currentFullFlushDelQueue = session; + return true; } + + /* + * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a + * two stage operation; the caller must ensure (in try/finally) that finishFlush + * is called after this method, to release the flush lock in DWFlushControl + */ + final boolean flushAllThreads(final boolean flushDeletes) + throws IOException { + final DocumentsWriterDeleteQueue flushingDeleteQueue; - /* Return int[]s to the pool */ - synchronized void recycleIntBlocks(int[][] blocks, int start, int end) { - for(int i=start;i= ramBufferSize; - } - - if (doBalance) { - - if (infoStream != null) { - message(" RAM: balance allocations: usedMB=" + toMB(bytesUsed()) + - " vs trigger=" + toMB(ramBufferSize) + - " deletesMB=" + toMB(deletesRAMUsed) + - " byteBlockFree=" + toMB(byteBlockAllocator.bytesUsed()) + - " perDocFree=" + toMB(perDocAllocator.bytesUsed())); + boolean anythingFlushed = false; + try { + DocumentsWriterPerThread flushingDWPT; + // Help out with flushing: + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + anythingFlushed |= doFlush(flushingDWPT); } - - final long startBytesUsed = bytesUsed() + deletesRAMUsed; - - int iter = 0; - - // We free equally from each pool in 32 KB - // chunks until we are below our threshold - // (freeLevel) - - boolean any = true; - - final long freeLevel = (long) (0.95 * ramBufferSize); - - while(bytesUsed()+deletesRAMUsed > freeLevel) { - - synchronized(this) { - if (0 == perDocAllocator.numBufferedBlocks() && - 0 == byteBlockAllocator.numBufferedBlocks() && - 0 == freeIntBlocks.size() && !any) { - // Nothing else to free -- must flush now. - bufferIsFull = bytesUsed()+deletesRAMUsed > ramBufferSize; - if (infoStream != null) { - if (bytesUsed()+deletesRAMUsed > ramBufferSize) { - message(" nothing to free; set bufferIsFull"); - } else { - message(" nothing to free"); - } - } - break; - } - - if ((0 == iter % 4) && byteBlockAllocator.numBufferedBlocks() > 0) { - byteBlockAllocator.freeBlocks(1); - } - if ((1 == iter % 4) && freeIntBlocks.size() > 0) { - freeIntBlocks.remove(freeIntBlocks.size()-1); - bytesUsed.addAndGet(-INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT); - } - if ((2 == iter % 4) && perDocAllocator.numBufferedBlocks() > 0) { - perDocAllocator.freeBlocks(32); // Remove upwards of 32 blocks (each block is 1K) - } + // If a concurrent flush is still in flight wait for it + while (flushControl.anyFlushing()) { + flushControl.waitForFlush(); + } + if (!anythingFlushed && flushDeletes) { + synchronized (ticketQueue) { + ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false)); } - - if ((3 == iter % 4) && any) { - // Ask consumer to free any recycled state - any = consumer.freeRAM(); - } - - iter++; + applyFlushTickets(); } - - if (infoStream != null) { - message(" after free: freedMB=" + nf.format((startBytesUsed-bytesUsed()-deletesRAMUsed)/1024./1024.) + " usedMB=" + nf.format((bytesUsed()+deletesRAMUsed)/1024./1024.)); - } + } finally { + assert flushingDeleteQueue == currentFullFlushDelQueue; } + return anythingFlushed; } - - final WaitQueue waitQueue = new WaitQueue(); - - private class WaitQueue { - DocWriter[] waiting; - int nextWriteDocID; - int nextWriteLoc; - int numWaiting; - long waitingBytes; - - public WaitQueue() { - waiting = new DocWriter[10]; + + final void finishFullFlush(boolean success) { + assert setFlushingDeleteQueue(null); + if (success) { + // Release the flush lock + flushControl.finishFullFlush(); + } else { + flushControl.abortFullFlushes(); } + } - synchronized void reset() { - // NOTE: nextWriteLoc doesn't need to be reset - assert numWaiting == 0; - assert waitingBytes == 0; - nextWriteDocID = 0; + static final class FlushTicket { + final FrozenBufferedDeletes frozenDeletes; + /* access to non-final members must be synchronized on DW#ticketQueue */ + FlushedSegment segment; + boolean isSegmentFlush; + + FlushTicket(FrozenBufferedDeletes frozenDeletes, boolean isSegmentFlush) { + this.frozenDeletes = frozenDeletes; + this.isSegmentFlush = isSegmentFlush; } - - synchronized boolean doResume() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueueResumeBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueueResumeBytes = 2*1024*1024; - } else { - waitQueueResumeBytes = (long) (mb*1024*1024*0.05); - } - return waitingBytes <= waitQueueResumeBytes; + + boolean canPublish() { + return (!isSegmentFlush || segment != null); } - - synchronized boolean doPause() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueuePauseBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueuePauseBytes = 4*1024*1024; - } else { - waitQueuePauseBytes = (long) (mb*1024*1024*0.1); - } - return waitingBytes > waitQueuePauseBytes; - } - - synchronized void abort() { - int count = 0; - for(int i=0;i= nextWriteDocID; - - if (doc.docID == nextWriteDocID) { - writeDocument(doc); - while(true) { - doc = waiting[nextWriteLoc]; - if (doc != null) { - numWaiting--; - waiting[nextWriteLoc] = null; - waitingBytes -= doc.sizeInBytes(); - writeDocument(doc); - } else { - break; - } - } - } else { - - // I finished before documents that were added - // before me. This can easily happen when I am a - // small doc and the docs before me were large, or, - // just due to luck in the thread scheduling. Just - // add myself to the queue and when that large doc - // finishes, it will flush me: - int gap = doc.docID - nextWriteDocID; - if (gap >= waiting.length) { - // Grow queue - DocWriter[] newArray = new DocWriter[ArrayUtil.oversize(gap, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - assert nextWriteLoc >= 0; - System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc); - System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc); - nextWriteLoc = 0; - waiting = newArray; - gap = doc.docID - nextWriteDocID; - } - - int loc = nextWriteLoc + gap; - if (loc >= waiting.length) { - loc -= waiting.length; - } - - // We should only wrap one time - assert loc < waiting.length; - - // Nobody should be in my spot! - assert waiting[loc] == null; - waiting[loc] = doc; - numWaiting++; - waitingBytes += doc.sizeInBytes(); - } - - return doPause(); - } } } Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (working copy) @@ -28,11 +28,10 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField { - final TermVectorsTermsWriterPerThread perThread; final TermsHashPerField termsHashPerField; final TermVectorsTermsWriter termsWriter; final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; boolean doVectors; @@ -41,11 +40,10 @@ int maxNumPostings; OffsetAttribute offsetAttribute = null; - - public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) { + + public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) { this.termsHashPerField = termsHashPerField; - this.perThread = perThread; - this.termsWriter = perThread.termsWriter; + this.termsWriter = termsWriter; this.fieldInfo = fieldInfo; docState = termsHashPerField.docState; fieldState = termsHashPerField.fieldState; @@ -72,65 +70,55 @@ } if (doVectors) { - if (perThread.doc == null) { - perThread.doc = termsWriter.getPerDoc(); - perThread.doc.docID = docState.docID; - assert perThread.doc.numVectorFields == 0; - assert 0 == perThread.doc.perDocTvf.length(); - assert 0 == perThread.doc.perDocTvf.getFilePointer(); + termsWriter.hasVectors = true; + if (termsHashPerField.bytesHash.size() != 0) { + // Only necessary if previous doc hit a + // non-aborting exception while writing vectors in + // this field: + termsHashPerField.reset(); + } } - assert perThread.doc.docID == docState.docID; - - if (termsHashPerField.bytesHash.size() != 0) { - // Only necessary if previous doc hit a - // non-aborting exception while writing vectors in - // this field: - termsHashPerField.reset(); - perThread.termsHashPerThread.reset(false); - } - } - // TODO: only if needed for performance //perThread.postingsCount = 0; return doVectors; - } + } public void abort() {} /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to - * the real term vectors files in the Directory. */ - @Override + * the real term vectors files in the Directory. */ @Override void finish() throws IOException { + if (!doVectors || termsHashPerField.bytesHash.size() == 0) + return; + termsWriter.addFieldToFlush(this); + } + + void finishDocument() throws IOException { assert docState.testPoint("TermVectorsTermsWriterPerField.finish start"); final int numPostings = termsHashPerField.bytesHash.size(); - final BytesRef flushTerm = perThread.flushTerm; + final BytesRef flushTerm = termsWriter.flushTerm; assert numPostings >= 0; - if (!doVectors || numPostings == 0) - return; - if (numPostings > maxNumPostings) maxNumPostings = numPostings; - final IndexOutput tvf = perThread.doc.perDocTvf; - // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; - assert perThread.vectorFieldsInOrder(fieldInfo); + assert termsWriter.vectorFieldsInOrder(fieldInfo); - perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; + final IndexOutput tvf = termsWriter.tvf; // TODO: we may want to make this sort in same order // as Codec's terms dict? @@ -140,21 +128,21 @@ byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; - if (doVectorOffsets) + if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; - - final ByteSliceReader reader = perThread.vectorSliceReader; - final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; + final ByteSliceReader reader = termsWriter.vectorSliceReader; + final ByteBlockPool termBytePool = termsHashPerField.termBytePool; + for(int j=0;jIndexWriter creates and maintains an index. -

The create argument to the {@link - #IndexWriter(Directory, IndexWriterConfig) constructor} determines +

The {@link OpenMode} option on + {@link IndexWriterConfig#setOpenMode(OpenMode)} determines whether a new index is created, or whether an existing index is - opened. Note that you can open an index with create=true - even while readers are using the index. The old readers will + opened. Note that you can open an index with {@link OpenMode#CREATE} + even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, - and won't see the newly created index until they re-open. There are - also {@link #IndexWriter(Directory, IndexWriterConfig) constructors} - with no create argument which will create a new index - if there is not already an index at the provided path and otherwise - open the existing index.

+ and won't see the newly created index until they re-open. If + {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a + new index if there is not already an index at the provided path + and otherwise open the existing index.

In either case, documents are added with {@link #addDocument(Document) addDocument} and removed with {@link #deleteDocuments(Term)} or {@link @@ -76,15 +77,19 @@

These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method - calls). A flush is triggered when there are enough - buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) - or enough added documents since the last flush, whichever - is sooner. For the added documents, flushing is triggered - either by RAM usage of the documents (see {@link - IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents. - The default is to flush when RAM usage hits 16 MB. For + calls). A flush is triggered when there are enough added documents + since the last flush. Flushing is triggered either by RAM usage of the + documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the + number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). + The default is to flush when RAM usage hits + {@value IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For best indexing speed you should flush by RAM usage with a - large RAM buffer. Note that flushing just moves the + large RAM buffer. Additionally, if IndexWriter reaches the configured number of + buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) + the deleted terms and queries are flushed and applied to existing segments. + In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and + {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms + won't trigger a segment flush. Note that flushing just moves the internal buffered state in IndexWriter into the index, but these changes are not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may @@ -165,21 +170,21 @@ /* * Clarification: Check Points (and commits) * IndexWriter writes new index files to the directory without writing a new segments_N - * file which references these new files. It also means that the state of + * file which references these new files. It also means that the state of * the in memory SegmentInfos object is different than the most recent * segments_N file written to the directory. - * - * Each time the SegmentInfos is changed, and matches the (possibly - * modified) directory files, we have a new "check point". - * If the modified/new SegmentInfos is written to disk - as a new - * (generation of) segments_N file - this check point is also an + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) segments_N file - this check point is also an * IndexCommit. - * - * A new checkpoint always replaces the previous checkpoint and - * becomes the new "front" of the index. This allows the IndexFileDeleter + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter * to delete files that are referenced only by stale checkpoints. * (files that were created since the last commit, but are no longer - * referenced by the "front" of the index). For this, IndexFileDeleter + * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ public class IndexWriter implements Closeable { @@ -195,7 +200,7 @@ * printed to infoStream, if set (see {@link * #setInfoStream}). */ - public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH_UTF8; + public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; // The normal read buffer size defaults to 1024, but // increasing this during merging seems to yield @@ -225,7 +230,7 @@ final FieldNumberBiMap globalFieldNumberMap; private DocumentsWriter docWriter; - private IndexFileDeleter deleter; + final IndexFileDeleter deleter; private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization private int optimizeMaxNumSegments; @@ -247,12 +252,12 @@ private long mergeGen; private boolean stopMerges; - private final AtomicInteger flushCount = new AtomicInteger(); - private final AtomicInteger flushDeletesCount = new AtomicInteger(); + final AtomicInteger flushCount = new AtomicInteger(); + final AtomicInteger flushDeletesCount = new AtomicInteger(); final ReaderPool readerPool = new ReaderPool(); final BufferedDeletesStream bufferedDeletesStream; - + // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and // then cooled to permanently record the event): it's @@ -339,31 +344,56 @@ */ IndexReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); - + final long tStart = System.currentTimeMillis(); if (infoStream != null) { message("flush at getReader"); } - // Do this up front before flushing so that the readers // obtained during this flush are pooled, the first time // this method is called: poolReaders = true; - - // Prevent segmentInfos from changing while opening the - // reader; in theory we could do similar retry logic, - // just like we do when loading segments_N - IndexReader r; - synchronized(this) { - flush(false, applyAllDeletes); - r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); - if (infoStream != null) { - message("return reader version=" + r.getVersion() + " reader=" + r); + final IndexReader r; + doBeforeFlush(); + final boolean maybeMerge; + /* + * for releasing a NRT reader we must ensure that + * DW doesn't add any segments or deletes until we are + * done with creating the NRT DirectoryReader. + * We release the two stage full flush after we are done opening the + * directory reader! + */ + synchronized (fullFlushLock) { + boolean success = false; + try { + maybeMerge = docWriter.flushAllThreads(applyAllDeletes); + if (!maybeMerge) { + flushCount.incrementAndGet(); + } + success = true; + // Prevent segmentInfos from changing while opening the + // reader; in theory we could do similar retry logic, + // just like we do when loading segments_N + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); + if (infoStream != null) { + message("return reader version=" + r.getVersion() + " reader=" + r); + } + } + } finally { + if (!success && infoStream != null) { + message("hit exception during while NRT reader"); + } + // Done: finish the full flush! + docWriter.finishFullFlush(success); + doAfterFlush(); } } - maybeMerge(); - + if(maybeMerge) { + maybeMerge(); + } if (infoStream != null) { message("getReader took " + (System.currentTimeMillis() - tStart) + " msec"); } @@ -400,10 +430,10 @@ if (r != null) { r.hasChanges = false; } - } + } } } - + // used only by asserts public synchronized boolean infoIsLive(SegmentInfo info) { int idx = segmentInfos.indexOf(info); @@ -419,7 +449,7 @@ } return info; } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -432,7 +462,7 @@ public synchronized boolean release(SegmentReader sr) throws IOException { return release(sr, false); } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -493,7 +523,7 @@ sr.close(); } } - + /** Remove all our references to readers, and commits * any pending changes. */ synchronized void close() throws IOException { @@ -503,7 +533,7 @@ Iterator> iter = readerMap.entrySet().iterator(); while (iter.hasNext()) { - + Map.Entry ent = iter.next(); SegmentReader sr = ent.getValue(); @@ -526,7 +556,7 @@ sr.decRef(); } } - + /** * Commit all segment reader in the pool. * @throws IOException @@ -550,7 +580,7 @@ } } } - + /** * Returns a ref to a clone. NOTE: this clone is not * enrolled in the pool, so you should simply close() @@ -564,7 +594,7 @@ sr.decRef(); } } - + /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} @@ -580,7 +610,7 @@ /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} - * + * * @see #release(SegmentReader) * @param info * @param doOpenStores @@ -638,7 +668,7 @@ return sr; } } - + /** * Obtain the number of deleted docs for a pooled reader. * If the reader isn't being pooled, the segmentInfo's @@ -658,7 +688,7 @@ } } } - + /** * Used internally to throw an {@link * AlreadyClosedException} if this IndexWriter has been @@ -721,7 +751,7 @@ mergePolicy.setIndexWriter(this); mergeScheduler = conf.getMergeScheduler(); codecs = conf.getCodecProvider(); - + bufferedDeletesStream = new BufferedDeletesStream(messageID); bufferedDeletesStream.setInfoStream(infoStream); poolReaders = conf.getReaderPooling(); @@ -790,8 +820,7 @@ // start with previous field numbers, but new FieldInfos globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory); - docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(), - globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream); + docWriter = new DocumentsWriter(config, directory, this, globalFieldNumberMap, bufferedDeletesStream); docWriter.setInfoStream(infoStream); // Default deleter (for backwards compatibility) is @@ -849,7 +878,7 @@ public IndexWriterConfig getConfig() { return config; } - + /** If non-null, this will be the default infoStream used * by a newly instantiated IndexWriter. * @see #setInfoStream @@ -901,7 +930,7 @@ public boolean verbose() { return infoStream != null; } - + /** * Commits all changes to an index and closes all * associated files. Note that this may be a costly @@ -916,7 +945,7 @@ * even though part of it (flushing buffered documents) * may have succeeded, so the write lock will still be * held.

- * + * *

If you can correct the underlying cause (eg free up * some disk space) then you can call close() again. * Failing that, if you want to force the write lock to be @@ -1036,7 +1065,7 @@ if (infoStream != null) message("now call final commit()"); - + if (!hitOOM) { commitInternal(null); } @@ -1049,7 +1078,7 @@ docWriter = null; deleter.close(); } - + if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -1072,7 +1101,7 @@ } /** Returns the Directory used by this index. */ - public Directory getDirectory() { + public Directory getDirectory() { // Pass false because the flush during closing calls getDirectory ensureOpen(false); return directory; @@ -1196,22 +1225,7 @@ * @throws IOException if there is a low-level IO error */ public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - ensureOpen(); - boolean doFlush = false; - boolean success = false; - try { - try { - doFlush = docWriter.updateDocument(doc, analyzer, null); - success = true; - } finally { - if (!success && infoStream != null) - message("hit exception adding document"); - } - if (doFlush) - flush(true, false); - } catch (OutOfMemoryError oom) { - handleOOM(oom, "addDocument"); - } + updateDocument(null, doc, analyzer); } /** @@ -1228,9 +1242,7 @@ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteTerm(term, false)) { - flush(true, false); - } + docWriter.deleteTerms(term); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Term)"); } @@ -1238,7 +1250,8 @@ /** * Deletes the document(s) containing any of the - * terms. All deletes are flushed at the same time. + * terms. All given deletes are applied and flushed atomically + * at the same time. * *

NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See (segmentInfos); optimizeMaxNumSegments = maxNumSegments; - + // Now mark all pending & running merges as optimize // merge: for(final MergePolicy.OneMerge merge : pendingMerges) { @@ -1612,12 +1620,12 @@ if (merge.optimize) return true; } - + for (final MergePolicy.OneMerge merge : runningMerges) { if (merge.optimize) return true; } - + return false; } @@ -1914,7 +1922,7 @@ /** * Delete all documents in the index. * - *

This method will drop all buffered documents and will + *

This method will drop all buffered documents and will * remove all segments from the index. This change will not be * visible until a {@link #commit()} has been called. This method * can be rolled back using {@link #rollback()}.

@@ -1944,7 +1952,7 @@ deleter.refresh(); // Don't bother saving any changes in our segmentInfos - readerPool.clear(null); + readerPool.clear(null); // Mark that the index has changed ++changeCount; @@ -1971,7 +1979,7 @@ mergeFinish(merge); } pendingMerges.clear(); - + for (final MergePolicy.OneMerge merge : runningMerges) { if (infoStream != null) message("now abort running merge " + merge.segString(directory)); @@ -1998,7 +2006,7 @@ message("all running merges have aborted"); } else { - // waitForMerges() will ensure any running addIndexes finishes. + // waitForMerges() will ensure any running addIndexes finishes. // It's fine if a new one attempts to start because from our // caller above the call will see that we are in the // process of closing, and will throw an @@ -2010,7 +2018,7 @@ /** * Wait for any currently outstanding merges to finish. * - *

It is guaranteed that any merges started prior to calling this method + *

It is guaranteed that any merges started prior to calling this method * will have completed once this method completes.

*/ public synchronized void waitForMerges() { @@ -2040,6 +2048,125 @@ deleter.checkpoint(segmentInfos, false); } + /** + * Prepares the {@link SegmentInfo} for the new flushed segment and persists + * the deleted documents {@link BitVector}. Use + * {@link #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)} to + * publish the returned {@link SegmentInfo} together with its segment private + * delete packet. + * + * @see #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes) + */ + SegmentInfo prepareFlushedSegment(FlushedSegment flushedSegment) throws IOException { + assert flushedSegment != null; + + SegmentInfo newSegment = flushedSegment.segmentInfo; + + setDiagnostics(newSegment, "flush"); + + boolean success = false; + try { + if (useCompoundFile(newSegment)) { + String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + message("creating compound file " + compoundFileName); + // Now build compound file + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); + for(String fileName : newSegment.files()) { + cfsWriter.addFile(fileName); + } + + // Perform the merge + cfsWriter.close(); + synchronized(this) { + deleter.deleteNewFiles(newSegment.files()); + } + + newSegment.setUseCompoundFile(true); + } + + // Must write deleted docs after the CFS so we don't + // slurp the del file into CFS: + if (flushedSegment.deletedDocuments != null) { + final int delCount = flushedSegment.deletedDocuments.count(); + assert delCount > 0; + newSegment.setDelCount(delCount); + newSegment.advanceDelGen(); + final String delFileName = newSegment.getDelFileName(); + if (infoStream != null) { + message("flush: write " + delCount + " deletes to " + delFileName); + } + boolean success2 = false; + try { + // TODO: in the NRT case it'd be better to hand + // this del vector over to the + // shortly-to-be-opened SegmentReader and let it + // carry the changes; there's no reason to use + // filesystem as intermediary here. + flushedSegment.deletedDocuments.write(directory, delFileName); + success2 = true; + } finally { + if (!success2) { + try { + directory.deleteFile(delFileName); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } + } + + success = true; + } finally { + if (!success) { + if (infoStream != null) { + message("hit exception " + + "reating compound file for newly flushed segment " + newSegment.name); + } + + synchronized(this) { + deleter.refresh(newSegment.name); + } + } + } + return newSegment; + } + + /** + * Atomically adds the segment private delete packet and publishes the flushed + * segments SegmentInfo to the index writer. NOTE: use + * {@link #prepareFlushedSegment(FlushedSegment)} to obtain the + * {@link SegmentInfo} for the flushed segment. + * + * @see #prepareFlushedSegment(FlushedSegment) + */ + synchronized void publishFlushedSegment(SegmentInfo newSegment, + FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException { + // Lock order IW -> BDS + synchronized (bufferedDeletesStream) { + if (globalPacket != null && globalPacket.any()) { + bufferedDeletesStream.push(globalPacket); + } + // Publishing the segment must be synched on IW -> BDS to make the sure + // that no merge prunes away the seg. private delete packet + final long nextGen; + if (packet != null && packet.any()) { + nextGen = bufferedDeletesStream.push(packet); + } else { + // Since we don't have a delete packet to apply we can get a new + // generation right away + nextGen = bufferedDeletesStream.getNextGen(); + } + newSegment.setBufferedDeletesGen(nextGen); + segmentInfos.add(newSegment); + checkpoint(); + } + } + + synchronized boolean useCompoundFile(SegmentInfo segmentInfo) throws IOException { + return mergePolicy.useCompoundFile(segmentInfos, segmentInfo); + } + private synchronized void resetMergeExceptions() { mergeExceptions = new ArrayList(); mergeGen++; @@ -2088,11 +2215,11 @@ *

* NOTE: this method only copies the segments of the incoming indexes * and does not merge them. Therefore deleted documents are not removed and - * the new segments are not merged with the existing ones. Also, the segments - * are copied as-is, meaning they are not converted to CFS if they aren't, - * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} + * the new segments are not merged with the existing ones. Also, the segments + * are copied as-is, meaning they are not converted to CFS if they aren't, + * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} * or {@link #optimize} afterwards. - * + * *

This requires this index not be among those to be added. * *

@@ -2129,7 +2256,7 @@ docCount += info.docCount; String newSegName = newSegmentName(); String dsName = info.getDocStoreSegment(); - + if (infoStream != null) { message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); } @@ -2176,7 +2303,7 @@ infos.add(info); } - } + } synchronized (this) { ensureOpen(); @@ -2225,11 +2352,12 @@ SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, null, codecs, payloadProcessorProvider, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs))); - + for (IndexReader reader : readers) // add new indexes merger.add(reader); - + int docCount = merger.merge(); // merge 'em + final FieldInfos fieldInfos = merger.fieldInfos(); SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), @@ -2241,11 +2369,11 @@ synchronized(this) { // Guard segmentInfos useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info); } - + // Now create the compound file if needed if (useCompoundFile) { merger.createCompoundFile(mergedName + ".cfs", info); - + // delete new non cfs files directly: they were never // registered with IFD deleter.deleteNewFiles(info.files()); @@ -2297,7 +2425,7 @@ * #commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

- * + * * You can also just call {@link #commit(Map)} directly * without prepareCommit first in which case that method * will internally call prepareCommit. @@ -2441,6 +2569,10 @@ } } + // Ensures only one flush() is actually flushing segments + // at a time: + private final Object fullFlushLock = new Object(); + /** * Flush all in-memory buffered updates (adds and deletes) * to the Directory. @@ -2464,117 +2596,105 @@ } } - // TODO: this method should not have to be entirely - // synchronized, ie, merges should be allowed to commit - // even while a flush is happening - private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { - + private boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { if (hitOOM) { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush"); } doBeforeFlush(); - assert testPoint("startDoFlush"); - - // We may be flushing because it was triggered by doc - // count, del count, ram usage (in which case flush - // pending is already set), or we may be flushing - // due to external event eg getReader or commit is - // called (in which case we now set it, and this will - // pause all threads): - flushControl.setFlushPendingNoWait("explicit flush"); - boolean success = false; - try { if (infoStream != null) { message(" start flush: applyAllDeletes=" + applyAllDeletes); message(" index before flush " + segString()); } - - final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos); - if (newSegment != null) { - setDiagnostics(newSegment, "flush"); - segmentInfos.add(newSegment); - checkpoint(); - } - - if (!applyAllDeletes) { - // If deletes alone are consuming > 1/2 our RAM - // buffer, force them all to apply now. This is to - // prevent too-frequent flushing of a long tail of - // tiny segments: - if (flushControl.getFlushDeletes() || - (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH && - bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) { - applyAllDeletes = true; - if (infoStream != null) { - message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB())); - } + final boolean maybeMerge; + + synchronized (fullFlushLock) { + try { + maybeMerge = docWriter.flushAllThreads(applyAllDeletes); + success = true; + } finally { + docWriter.finishFullFlush(success); } } - - if (applyAllDeletes) { - if (infoStream != null) { - message("apply all deletes during flush"); + success = false; + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + doAfterFlush(); + if (!maybeMerge) { + // flushCount is incremented in flushAllThreads + flushCount.incrementAndGet(); } - flushDeletesCount.incrementAndGet(); - final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos); - if (result.anyDeletes) { - checkpoint(); - } - if (!keepFullyDeletedSegments && result.allDeleted != null) { - if (infoStream != null) { - message("drop 100% deleted segments: " + result.allDeleted); - } - for(SegmentInfo info : result.allDeleted) { - // If a merge has already registered for this - // segment, we leave it in the readerPool; the - // merge will skip merging it and will then drop - // it once it's done: - if (!mergingSegments.contains(info)) { - segmentInfos.remove(info); - if (readerPool != null) { - readerPool.drop(info); - } - } - } - checkpoint(); - } - bufferedDeletesStream.prune(segmentInfos); - assert !bufferedDeletesStream.any(); - flushControl.clearDeletes(); - } else if (infoStream != null) { - message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + success = true; + return maybeMerge; } - - doAfterFlush(); - flushCount.incrementAndGet(); - - success = true; - - return newSegment != null; - } catch (OutOfMemoryError oom) { handleOOM(oom, "doFlush"); // never hit return false; } finally { - flushControl.clearFlushPending(); if (!success && infoStream != null) message("hit exception during flush"); } } + + final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException { + if (applyAllDeletes) { + if (infoStream != null) { + message("apply all deletes during flush"); + } + applyAllDeletes(); + } else if (infoStream != null) { + message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + } + } + + final synchronized void applyAllDeletes() throws IOException { + flushDeletesCount.incrementAndGet(); + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream + .applyDeletes(readerPool, segmentInfos); + if (result.anyDeletes) { + checkpoint(); + } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream != null) { + message("drop 100% deleted segments: " + result.allDeleted); + } + for (SegmentInfo info : result.allDeleted) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(info)) { + segmentInfos.remove(info); + if (readerPool != null) { + readerPool.drop(info); + } + } + } + checkpoint(); + } + bufferedDeletesStream.prune(segmentInfos); + } + /** Expert: Return the total size of all index files currently cached in memory. * Useful for size management with flushRamDocs() */ public final long ramSizeInBytes() { ensureOpen(); - return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed(); + return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed(); } + + // for testing only + DocumentsWriter getDocsWriter() { + boolean test = false; + assert test = true; + return test?docWriter: null; + } /** Expert: Return the number of documents currently * buffered in RAM. */ @@ -2709,7 +2829,7 @@ } commitMergedDeletes(merge, mergedReader); - + // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound @@ -2723,7 +2843,7 @@ message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); } - final Set mergedAway = new HashSet(merge.segments); + final Set mergedAway = new HashSet(merge.segments); int segIdx = 0; int newSegIdx = 0; boolean inserted = false; @@ -2770,15 +2890,15 @@ // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.clear(merge.segments); - + if (merge.optimize) { // cascade the optimize: segmentsToOptimize.add(merge.info); } - + return true; } - + final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException { if (infoStream != null) { @@ -2867,7 +2987,7 @@ /** Hook that's called when the specified merge is complete. */ void mergeSuccess(MergePolicy.OneMerge merge) { } - + /** Checks whether this merge involves any segments * already participating in a merge. If not, this merge * is "registered", meaning we record that its segments @@ -2998,7 +3118,6 @@ // Lock order: IW -> BD bufferedDeletesStream.prune(segmentInfos); - Map details = new HashMap(); details.put("optimize", Boolean.toString(merge.optimize)); details.put("mergeFactor", Integer.toString(merge.segments.size())); @@ -3019,11 +3138,11 @@ mergingSegments.add(merge.info); } - private void setDiagnostics(SegmentInfo info, String source) { + static void setDiagnostics(SegmentInfo info, String source) { setDiagnostics(info, source, null); } - private void setDiagnostics(SegmentInfo info, String source, Map details) { + private static void setDiagnostics(SegmentInfo info, String source, Map details) { Map diagnostics = new HashMap(); diagnostics.put("source", source); diagnostics.put("lucene.version", Constants.LUCENE_VERSION); @@ -3041,7 +3160,7 @@ /** Does fininishing for a merge, which is fast but holds * the synchronized lock on IndexWriter instance. */ final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException { - + // Optimize, addIndexes or finishMerges may be waiting // on merges to finish. notifyAll(); @@ -3113,11 +3232,11 @@ * instance */ private int mergeMiddle(MergePolicy.OneMerge merge) throws CorruptIndexException, IOException { - + merge.checkAborted(directory); final String mergedName = merge.info.name; - + int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; @@ -3191,7 +3310,7 @@ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size()); } anyNonBulkMerges |= merger.getAnyNonBulkMerges(); - + assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount; // Very important to do this before opening the reader @@ -3325,12 +3444,12 @@ // For test purposes. final int getBufferedDeleteTermsSize() { - return docWriter.getPendingDeletes().terms.size(); + return docWriter.getBufferedDeleteTermsSize(); } // For test purposes. final int getNumBufferedDeleteTerms() { - return docWriter.getPendingDeletes().numTermDeletes.get(); + return docWriter.getNumBufferedDeleteTerms(); } // utility routines for tests @@ -3445,17 +3564,17 @@ assert lastCommitChangeCount <= changeCount; myChangeCount = changeCount; - + if (changeCount == lastCommitChangeCount) { if (infoStream != null) message(" skip startCommit(): no changes pending"); return; } - + // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() all files // referenced by toSync, in the background. - + if (infoStream != null) message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); @@ -3463,10 +3582,10 @@ toSync = (SegmentInfos) segmentInfos.clone(); assert filesExist(toSync); - + if (commitUserData != null) toSync.setUserData(commitUserData); - + // This protects the segmentInfos we are now going // to commit. This is important in case, eg, while // we are trying to sync all referenced files, a @@ -3598,7 +3717,7 @@ /** Expert: remove any index files that are no longer * used. - * + * *

IndexWriter normally deletes unused files itself, * during indexing. However, on Windows, which disallows * deletion of open files, if there is a reader open on @@ -3647,7 +3766,7 @@ public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) { payloadProcessorProvider = pcp; } - + /** * Returns the {@link PayloadProcessorProvider} that is used during segment * merges to process payloads. @@ -3655,124 +3774,4 @@ public PayloadProcessorProvider getPayloadProcessorProvider() { return payloadProcessorProvider; } - - // decides when flushes happen - final class FlushControl { - - private boolean flushPending; - private boolean flushDeletes; - private int delCount; - private int docCount; - private boolean flushing; - - private synchronized boolean setFlushPending(String reason, boolean doWait) { - if (flushPending || flushing) { - if (doWait) { - while(flushPending || flushing) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - } - return false; - } else { - if (infoStream != null) { - message("now trigger flush reason=" + reason); - } - flushPending = true; - return flushPending; - } - } - - public synchronized void setFlushPendingNoWait(String reason) { - setFlushPending(reason, false); - } - - public synchronized boolean getFlushPending() { - return flushPending; - } - - public synchronized boolean getFlushDeletes() { - return flushDeletes; - } - - public synchronized void clearFlushPending() { - if (infoStream != null) { - message("clearFlushPending"); - } - flushPending = false; - flushDeletes = false; - docCount = 0; - notifyAll(); - } - - public synchronized void clearDeletes() { - delCount = 0; - } - - public synchronized boolean waitUpdate(int docInc, int delInc) { - return waitUpdate(docInc, delInc, false); - } - - public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) { - while(flushPending) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - - // skipWait is only used when a thread is BOTH adding - // a doc and buffering a del term, and, the adding of - // the doc already triggered a flush - if (skipWait) { - docCount += docInc; - delCount += delInc; - return false; - } - - final int maxBufferedDocs = config.getMaxBufferedDocs(); - if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (docCount+docInc) >= maxBufferedDocs) { - return setFlushPending("maxBufferedDocs", true); - } - docCount += docInc; - - final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms(); - if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (delCount+delInc) >= maxBufferedDeleteTerms) { - flushDeletes = true; - return setFlushPending("maxBufferedDeleteTerms", true); - } - delCount += delInc; - - return flushByRAMUsage("add delete/doc"); - } - - public synchronized boolean flushByRAMUsage(String reason) { - final double ramBufferSizeMB = config.getRAMBufferSizeMB(); - if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { - final long limit = (long) (ramBufferSizeMB*1024*1024); - long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - - // DocumentsWriter may be able to free up some - // RAM: - // Lock order: FC -> DW - docWriter.balanceRAM(); - - used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - return setFlushPending("ram full: " + reason, false); - } - } - } - return false; - } - } - - final FlushControl flushControl = new FlushControl(); } Index: lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java (working copy) @@ -1,25 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -abstract class InvertedDocEndConsumerPerThread { - abstract void startDocument(); - abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract void finishDocument(); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (working copy) @@ -20,12 +20,13 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; import java.io.IOException; final class TermVectorsWriter { - + private IndexOutput tvx = null, tvd = null, tvf = null; private FieldInfos fieldInfos; @@ -46,7 +47,7 @@ /** * Add a complete document specified by all its term vectors. If document has no * term vectors, add value for tvx. - * + * * @param vectors * @throws IOException */ @@ -99,7 +100,7 @@ final int[] freqs = vectors[i].getTermFrequencies(); for (int j=0; j deletable; - /* Reference count for all files in the index. + /* Reference count for all files in the index. * Counts how many existing commits reference a file. **/ private Map refCounts = new HashMap(); @@ -88,7 +94,7 @@ * non-commit checkpoint: */ private List> lastFiles = new ArrayList>(); - /* Commits that the IndexDeletionPolicy have decided to delete: */ + /* Commits that the IndexDeletionPolicy have decided to delete: */ private List commitsToDelete = new ArrayList(); private PrintStream infoStream; @@ -108,7 +114,7 @@ message("setInfoStream deletionPolicy=" + policy); } } - + private void message(String message) { infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); } @@ -139,12 +145,12 @@ // counts: long currentGen = segmentInfos.getGeneration(); indexFilenameFilter = new IndexFileNameFilter(codecs); - + CommitPoint currentCommitPoint = null; String[] files = null; try { files = directory.listAll(); - } catch (NoSuchDirectoryException e) { + } catch (NoSuchDirectoryException e) { // it means the directory is empty, so ignore it. files = new String[0]; } @@ -152,7 +158,7 @@ for (String fileName : files) { if ((indexFilenameFilter.accept(null, fileName)) && !fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { - + // Add this file to refCounts with initial count 0: getRefCount(fileName); @@ -233,7 +239,7 @@ // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. - for(Map.Entry entry : refCounts.entrySet() ) { + for(Map.Entry entry : refCounts.entrySet() ) { RefCount rc = entry.getValue(); final String fileName = entry.getKey(); if (0 == rc.count) { @@ -253,7 +259,7 @@ // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit checkpoint(segmentInfos, false); - + startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted(); deleteCommits(); @@ -327,7 +333,7 @@ segmentPrefix1 = null; segmentPrefix2 = null; } - + for(int i=0;i oldDeletable = deletable; @@ -397,7 +403,7 @@ /** * For definition of "check point" see IndexWriter comments: * "Clarification: Check Points (and commits)". - * + * * Writer calls this when it has made a "consistent * change" to the index, meaning new files are written to * the index and the in-memory SegmentInfos have been @@ -417,7 +423,7 @@ public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException { if (infoStream != null) { - message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); + message("now checkpoint \"" + segmentInfos + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java (working copy) @@ -1,45 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class FreqProxTermsWriterPerThread extends TermsHashConsumerPerThread { - final TermsHashPerThread termsHashPerThread; - final DocumentsWriter.DocState docState; - - public FreqProxTermsWriterPerThread(TermsHashPerThread perThread) { - docState = perThread.docState; - termsHashPerThread = perThread; - } - - @Override - public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { - return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); - } - - @Override - void startDocument() { - } - - @Override - DocumentsWriter.DocWriter finishDocument() { - return null; - } - - @Override - public void abort() {} -} Index: lucene/src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -65,7 +65,7 @@ this.segmentCodecs = segmentCodecs; codecId = ""; } - + /** * Create a shallow {@link SegmentWriteState} copy final a codec ID */ Index: lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (working copy) @@ -36,7 +36,7 @@ * indexed terms (many pairs of CharSequence text + long * fileOffset), and then this reader must be able to * retrieve the nearest index term to a provided term - * text. + * text. * @lucene.experimental */ public abstract class TermsIndexReaderBase implements Closeable { Index: lucene/src/java/org/apache/lucene/index/DocConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocConsumer.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocConsumer.java (working copy) @@ -18,11 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; abstract class DocConsumer { - abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException; - abstract void flush(final Collection threads, final SegmentWriteState state) throws IOException; + abstract void processDocument(FieldInfos fieldInfos) throws IOException; + abstract void finishDocument() throws IOException; + abstract void flush(final SegmentWriteState state) throws IOException; abstract void abort(); abstract boolean freeRAM(); + abstract void doAfterFlush(); } Index: lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class InvertedDocConsumerPerThread { - abstract void startDocument() throws IOException; - abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java (working copy) @@ -1,92 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -/** This is a DocFieldConsumer that inverts each field, - * separately, from a Document, and accepts a - * InvertedTermsConsumer to process those terms. */ - -final class DocInverterPerThread extends DocFieldConsumerPerThread { - final DocInverter docInverter; - final InvertedDocConsumerPerThread consumer; - final InvertedDocEndConsumerPerThread endConsumer; - final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); - - static class SingleTokenAttributeSource extends AttributeSource { - final CharTermAttribute termAttribute; - final OffsetAttribute offsetAttribute; - - private SingleTokenAttributeSource() { - termAttribute = addAttribute(CharTermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - } - - public void reinit(String stringValue, int startOffset, int endOffset) { - termAttribute.setEmpty().append(stringValue); - offsetAttribute.setOffset(startOffset, endOffset); - } - } - - final DocumentsWriter.DocState docState; - - final FieldInvertState fieldState = new FieldInvertState(); - - // Used to read a string value for a field - final ReusableStringReader stringReader = new ReusableStringReader(); - - public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) { - this.docInverter = docInverter; - docState = docFieldProcessorPerThread.docState; - consumer = docInverter.consumer.addThread(this); - endConsumer = docInverter.endConsumer.addThread(this); - } - - @Override - public void startDocument() throws IOException { - consumer.startDocument(); - endConsumer.startDocument(); - } - - @Override - public DocumentsWriter.DocWriter finishDocument() throws IOException { - // TODO: allow endConsumer.finishDocument to also return - // a DocWriter - endConsumer.finishDocument(); - return consumer.finishDocument(); - } - - @Override - void abort() { - try { - consumer.abort(); - } finally { - endConsumer.abort(); - } - } - - @Override - public DocFieldConsumerPerField addField(FieldInfo fi) { - return new DocInverterPerField(this, fi); - } -} Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java (working copy) @@ -1,89 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.BytesRef; - -final class TermVectorsTermsWriterPerThread extends TermsHashConsumerPerThread { - - final TermVectorsTermsWriter termsWriter; - final TermsHashPerThread termsHashPerThread; - final DocumentsWriter.DocState docState; - final BytesRef flushTerm = new BytesRef(); - - TermVectorsTermsWriter.PerDoc doc; - - public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter) { - this.termsWriter = termsWriter; - this.termsHashPerThread = termsHashPerThread; - docState = termsHashPerThread.docState; - } - - // Used by perField when serializing the term vectors - final ByteSliceReader vectorSliceReader = new ByteSliceReader(); - - @Override - public void startDocument() { - assert clearLastVectorFieldName(); - if (doc != null) { - doc.reset(); - doc.docID = docState.docID; - } - } - - @Override - public DocumentsWriter.DocWriter finishDocument() { - try { - return doc; - } finally { - doc = null; - } - } - - @Override - public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { - return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo); - } - - @Override - public void abort() { - if (doc != null) { - doc.abort(); - doc = null; - } - } - - // Called only by assert - final boolean clearLastVectorFieldName() { - lastVectorFieldName = null; - return true; - } - - // Called only by assert - String lastVectorFieldName; - final boolean vectorFieldsInOrder(FieldInfo fi) { - try { - if (lastVectorFieldName != null) - return lastVectorFieldName.compareTo(fi.name) < 0; - else - return true; - } finally { - lastVectorFieldName = fi.name; - } - } -} Index: lucene/src/java/org/apache/lucene/index/DocInverter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/DocInverter.java (working copy) @@ -18,13 +18,14 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; - import java.util.Map; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util.AttributeSource; + /** This is a DocFieldConsumer that inverts each field, * separately, from a Document, and accepts a * InvertedTermsConsumer to process those terms. */ @@ -34,42 +35,72 @@ final InvertedDocConsumer consumer; final InvertedDocEndConsumer endConsumer; - public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + final DocumentsWriterPerThread.DocState docState; + + final FieldInvertState fieldState = new FieldInvertState(); + + final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); + + static class SingleTokenAttributeSource extends AttributeSource { + final CharTermAttribute termAttribute; + final OffsetAttribute offsetAttribute; + + private SingleTokenAttributeSource() { + termAttribute = addAttribute(CharTermAttribute.class); + offsetAttribute = addAttribute(OffsetAttribute.class); + } + + public void reinit(String stringValue, int startOffset, int endOffset) { + termAttribute.setEmpty().append(stringValue); + offsetAttribute.setOffset(startOffset, endOffset); + } + } + + // Used to read a string value for a field + final ReusableStringReader stringReader = new ReusableStringReader(); + + public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + this.docState = docState; this.consumer = consumer; this.endConsumer = endConsumer; } @Override - void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> endChildThreadsAndFields = new HashMap>(); + Map childFieldsToFlush = new HashMap(); + Map endChildFieldsToFlush = new HashMap(); - for (Map.Entry> entry : threadsAndFields.entrySet() ) { + for (Map.Entry fieldToFlush : fieldsToFlush.entrySet()) { + DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue(); + childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer); + endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer); + } + consumer.flush(childFieldsToFlush, state); + endConsumer.flush(endChildFieldsToFlush, state); + } - DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey(); + @Override + public void startDocument() throws IOException { + consumer.startDocument(); + endConsumer.startDocument(); + } - Collection childFields = new HashSet(); - Collection endChildFields = new HashSet(); - for (final DocFieldConsumerPerField field: entry.getValue() ) { - DocInverterPerField perField = (DocInverterPerField) field; - childFields.add(perField.consumer); - endChildFields.add(perField.endConsumer); - } - - childThreadsAndFields.put(perThread.consumer, childFields); - endChildThreadsAndFields.put(perThread.endConsumer, endChildFields); - } - - consumer.flush(childThreadsAndFields, state); - endConsumer.flush(endChildThreadsAndFields, state); + public void finishDocument() throws IOException { + // TODO: allow endConsumer.finishDocument to also return + // a DocWriter + endConsumer.finishDocument(); + consumer.finishDocument(); } @Override void abort() { - consumer.abort(); - endConsumer.abort(); + try { + consumer.abort(); + } finally { + endConsumer.abort(); + } } @Override @@ -78,7 +109,8 @@ } @Override - public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) { - return new DocInverterPerThread(docFieldProcessorPerThread, this); + public DocFieldConsumerPerField addField(FieldInfo fi) { + return new DocInverterPerField(this, fi); } + } Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy) @@ -18,7 +18,7 @@ */ import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.IndexSearcher; @@ -41,7 +41,7 @@ * IndexWriterConfig conf = new IndexWriterConfig(analyzer); * conf.setter1().setter2(); * - * + * * @since 3.1 */ public final class IndexWriterConfig implements Cloneable { @@ -56,7 +56,7 @@ * */ public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND } - + /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here @@ -77,7 +77,7 @@ /** * Default value for the write lock timeout (1,000 ms). - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long WRITE_LOCK_TIMEOUT = 1000; @@ -94,6 +94,8 @@ /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */ public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; + /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */ + public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945; /** * Sets the default (for any instance) maximum time to wait for a write lock * (in milliseconds). @@ -105,7 +107,7 @@ /** * Returns the default write lock timeout for newly instantiated * IndexWriterConfigs. - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long getDefaultWriteLockTimeout() { @@ -127,10 +129,12 @@ private volatile IndexReaderWarmer mergedSegmentWarmer; private volatile CodecProvider codecProvider; private volatile MergePolicy mergePolicy; - private volatile int maxThreadStates; + private volatile DocumentsWriterPerThreadPool indexerThreadPool; private volatile boolean readerPooling; private volatile int readerTermsIndexDivisor; - + private volatile FlushPolicy flushPolicy; + private volatile int perThreadHardLimitMB; + private Version matchVersion; /** @@ -153,15 +157,16 @@ maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; - indexingChain = DocumentsWriter.defaultIndexingChain; + indexingChain = DocumentsWriterPerThread.defaultIndexingChain; mergedSegmentWarmer = null; codecProvider = CodecProvider.getDefault(); mergePolicy = new TieredMergePolicy(); - maxThreadStates = DEFAULT_MAX_THREAD_STATES; readerPooling = DEFAULT_READER_POOLING; + indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(DEFAULT_MAX_THREAD_STATES); readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; + perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; } - + @Override public Object clone() { // Shallow clone is the only thing that's possible, since parameters like @@ -186,7 +191,7 @@ this.openMode = openMode; return this; } - + /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { return openMode; @@ -261,7 +266,7 @@ public SimilarityProvider getSimilarityProvider() { return similarityProvider; } - + /** * Expert: set the interval between indexed terms. Large values cause less * memory to be used by IndexReader, but slow random-access to terms. Small @@ -281,7 +286,7 @@ * In particular, numUniqueTerms/interval terms are read into * memory by an IndexReader, and, on average, interval/2 terms * must be scanned for each random term access. - * + * * @see #DEFAULT_TERM_INDEX_INTERVAL * *

Takes effect immediately, but only applies to newly @@ -293,7 +298,7 @@ /** * Returns the interval between indexed terms. - * + * * @see #setTermIndexInterval(int) */ public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here @@ -331,10 +336,10 @@ this.writeLockTimeout = writeLockTimeout; return this; } - + /** * Returns allowed timeout when acquiring the write lock. - * + * * @see #setWriteLockTimeout(long) */ public long getWriteLockTimeout() { @@ -343,15 +348,16 @@ /** * Determines the minimal number of delete terms required before the buffered - * in-memory delete terms are applied and flushed. If there are documents - * buffered in memory at the time, they are merged and a new segment is - * created. - - *

Disabled by default (writer flushes by RAM usage). + * in-memory delete terms and queries are applied and flushed. + *

Disabled by default (writer flushes by RAM usage).

+ *

+ * NOTE: This setting won't trigger a segment flush. + *

* * @throws IllegalArgumentException if maxBufferedDeleteTerms * is enabled but smaller than 1 * @see #setRAMBufferSizeMB + * @see #setFlushPolicy(FlushPolicy) * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. @@ -366,9 +372,9 @@ } /** - * Returns the number of buffered deleted terms that will trigger a flush if - * enabled. - * + * Returns the number of buffered deleted terms that will trigger a flush of all + * buffered deletes if enabled. + * * @see #setMaxBufferedDeleteTerms(int) */ public int getMaxBufferedDeleteTerms() { @@ -380,45 +386,50 @@ * and deletions before they are flushed to the Directory. Generally for * faster indexing performance it's best to flush by RAM usage instead of * document count and use as large a RAM buffer as you can. - * *

* When this is set, the writer will flush whenever buffered documents and * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent * triggering a flush due to RAM usage. Note that if flushing by document * count is also enabled, then the flush will be triggered by whichever comes * first. - * *

+ * The maximum RAM limit is inherently determined by the JVMs available memory. + * Yet, an {@link IndexWriter} session can consume a significantly larger amount + * of memory than the given RAM limit since this limit is just an indicator when + * to flush memory resident documents to the Directory. Flushes are likely happen + * concurrently while other threads adding documents to the writer. For application + * stability the available memory in the JVM should be significantly larger than + * the RAM buffer used for indexing. + *

* NOTE: the account of RAM usage for pending deletions is only * approximate. Specifically, if you delete by Query, Lucene currently has no * way to measure the RAM usage of individual Queries so the accounting will * under-estimate and you should compensate by either calling commit() * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} - * to flush by count instead of RAM usage (each buffered delete Query counts - * as one). - * + * to flush and apply buffered deletes by count instead of RAM usage + * (for each buffered delete Query a constant number of bytes is used to estimate + * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will + * not trigger any segment flushes. *

- * NOTE: because IndexWriter uses ints when managing its - * internal storage, the absolute maximum value for this setting is somewhat - * less than 2048 MB. The precise limit depends on various factors, such as - * how large your documents are, how many fields have norms, etc., so it's - * best to set this value comfortably under 2048. + * NOTE: It's not guaranteed that all memory resident documents are flushed + * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a + * subset of the buffered documents are flushed and therefore only parts of the RAM + * buffer is released. + *

* - *

* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. - * + * @see #setFlushPolicy(FlushPolicy) + * @see #setRAMPerThreadHardLimitMB(int) + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @throws IllegalArgumentException * if ramBufferSize is enabled but non-positive, or it disables * ramBufferSize when maxBufferedDocs is already disabled + * */ public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { - if (ramBufferSizeMB > 2048.0) { - throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB - + " is too large; should be comfortably less than 2048"); - } if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) throw new IllegalArgumentException( "ramBufferSize should be > 0.0 MB when enabled"); @@ -438,22 +449,22 @@ * Determines the minimal number of documents required before the buffered * in-memory documents are flushed as a new Segment. Large values generally * give faster indexing. - * + * *

* When this is set, the writer will flush every maxBufferedDocs added * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a * flush due to number of buffered documents. Note that if flushing by RAM * usage is also enabled, then the flush will be triggered by whichever comes * first. - * + * *

* Disabled by default (writer flushes by RAM usage). - * + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @see #setRAMBufferSizeMB(double) - * + * @see #setFlushPolicy(FlushPolicy) * @throws IllegalArgumentException * if maxBufferedDocs is enabled but smaller than 2, or it disables * maxBufferedDocs when ramBufferSize is already disabled @@ -473,7 +484,7 @@ /** * Returns the number of buffered added documents that will trigger a flush if * enabled. - * + * * @see #setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() { @@ -519,32 +530,55 @@ return codecProvider; } - + /** * Returns the current MergePolicy in use by this writer. - * + * * @see #setMergePolicy(MergePolicy) */ public MergePolicy getMergePolicy() { return mergePolicy; } - /** - * Sets the max number of simultaneous threads that may be indexing documents - * at once in IndexWriter. Values < 1 are invalid and if passed - * maxThreadStates will be set to - * {@link #DEFAULT_MAX_THREAD_STATES}. - * - *

Only takes effect when IndexWriter is first created. */ - public IndexWriterConfig setMaxThreadStates(int maxThreadStates) { - this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates; + /** Expert: Sets the {@link DocumentsWriterPerThreadPool} instance used by the + * IndexWriter to assign thread-states to incoming indexing threads. If no + * {@link DocumentsWriterPerThreadPool} is set {@link IndexWriter} will use + * {@link ThreadAffinityDocumentsWriterThreadPool} with max number of + * thread-states set to {@value #DEFAULT_MAX_THREAD_STATES} (see + * {@link #DEFAULT_MAX_THREAD_STATES}). + *

+ *

+ * NOTE: The given {@link DocumentsWriterPerThreadPool} instance must not be used with + * other {@link IndexWriter} instances once it has been initialized / associated with an + * {@link IndexWriter}. + *

+ *

+ * NOTE: This only takes effect when IndexWriter is first created.

*/ + public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) { + if(threadPool == null) { + throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul"); + } + this.indexerThreadPool = threadPool; return this; } - /** Returns the max number of simultaneous threads that - * may be indexing documents at once in IndexWriter. */ + /** Returns the configured {@link DocumentsWriterPerThreadPool} instance. + * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool) + * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/ + public DocumentsWriterPerThreadPool getIndexerThreadPool() { + return this.indexerThreadPool; + } + + /** Returns the max number of simultaneous threads that may be indexing + * documents at once in IndexWriter. + *

+ * To modify the max number of thread-states a new + * {@link DocumentsWriterPerThreadPool} must be set via + * {@link #setIndexerThreadPool(DocumentsWriterPerThreadPool)}. + *

+ * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool) */ public int getMaxThreadStates() { - return maxThreadStates; + return indexerThreadPool.getMaxThreadStates(); } /** By default, IndexWriter does not pool the @@ -572,10 +606,10 @@ * *

Only takes effect when IndexWriter is first created. */ IndexWriterConfig setIndexingChain(IndexingChain indexingChain) { - this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain; + this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain; return this; } - + /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */ IndexingChain getIndexingChain() { return indexingChain; @@ -604,6 +638,53 @@ return readerTermsIndexDivisor; } + /** + * Expert: Controls when segments are flushed to disk during indexing. + * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized + * the given instance is bound to this {@link IndexWriter} and should not be used with another writer. + * @see #setMaxBufferedDeleteTerms(int) + * @see #setMaxBufferedDocs(int) + * @see #setRAMBufferSizeMB(double) + */ + public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) { + this.flushPolicy = flushPolicy; + return this; + } + + /** + * Expert: Sets the maximum memory consumption per thread triggering a forced + * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed + * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has + * not been exceeded. This is a safety limit to prevent a + * {@link DocumentsWriterPerThread} from address space exhaustion due to its + * internal 32 bit signed integer based memory addressing. + * The given value must be less that 2GB (2048MB) + * + * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB + */ + public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) { + if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) { + throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB"); + } + this.perThreadHardLimitMB = perThreadHardLimitMB; + return this; + } + + /** + * Returns the max amount of memory each {@link DocumentsWriterPerThread} can + * consume until forcefully flushed. + * @see #setRAMPerThreadHardLimitMB(int) + */ + public int getRAMPerThreadHardLimitMB() { + return perThreadHardLimitMB; + } + /** + * @see #setFlushPolicy(FlushPolicy) + */ + public FlushPolicy getFlushPolicy() { + return flushPolicy; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -623,9 +704,14 @@ sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n"); sb.append("codecProvider=").append(codecProvider).append("\n"); sb.append("mergePolicy=").append(mergePolicy).append("\n"); - sb.append("maxThreadStates=").append(maxThreadStates).append("\n"); + sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n"); + sb.append("maxThreadStates=").append(indexerThreadPool.getMaxThreadStates()).append("\n"); sb.append("readerPooling=").append(readerPooling).append("\n"); sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n"); + sb.append("flushPolicy=").append(flushPolicy).append("\n"); + sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n"); + return sb.toString(); } + } Index: lucene/src/java/org/apache/lucene/index/NormsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriter.java (revision 1097441) +++ lucene/src/java/org/apache/lucene/index/NormsWriter.java (working copy) @@ -19,11 +19,7 @@ import java.io.IOException; import java.util.Collection; -import java.util.Iterator; -import java.util.HashMap; import java.util.Map; -import java.util.List; -import java.util.ArrayList; import org.apache.lucene.store.IndexOutput; @@ -36,10 +32,6 @@ final class NormsWriter extends InvertedDocEndConsumer { - @Override - public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new NormsWriterPerThread(docInverterPerThread, this); - } @Override public void abort() {} @@ -50,40 +42,11 @@ /** Produce _X.nrm if any document had a field with norms * not disabled */ @Override - public void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { - - final Map> byField = new HashMap>(); - + public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { if (!state.fieldInfos.hasNorms()) { return; } - // Typically, each thread will have encountered the same - // field. So first we collate by field, ie, all - // per-thread field instances that correspond to the - // same FieldInfo - for (final Map.Entry> entry : threadsAndFields.entrySet()) { - final Collection fields = entry.getValue(); - final Iterator fieldsIt = fields.iterator(); - - while (fieldsIt.hasNext()) { - final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next(); - - if (perField.upto > 0) { - // It has some norms - List l = byField.get(perField.fieldInfo); - if (l == null) { - l = new ArrayList(); - byField.put(perField.fieldInfo, l); - } - l.add(perField); - } else - // Remove this field since we haven't seen it - // since the previous flush - fieldsIt.remove(); - } - } - final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION); IndexOutput normsOut = state.directory.createOutput(normsFileName); @@ -93,60 +56,25 @@ int normCount = 0; for (FieldInfo fi : state.fieldInfos) { - final List toMerge = byField.get(fi); + final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi); int upto = 0; - if (toMerge != null) { - - final int numFields = toMerge.size(); - + if (toWrite != null && toWrite.upto > 0) { normCount++; - final NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; - int[] uptos = new int[numFields]; - - for(int j=0;j 0) { - - assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length); - - int minLoc = 0; - int minDocID = fields[0].docIDs[uptos[0]]; - - for(int j=1;j files) throws IOException { - final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT); + final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT); files.add(seedFileName); SepPostingsReaderImpl.files(segmentInfo, codecId, files); StandardPostingsReader.files(dir, segmentInfo, codecId, files); BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); - + // hackish! Iterator it = files.iterator(); while(it.hasNext()) { Index: lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (revision 1097441) +++ lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (working copy) @@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThrottledIndexOutput; import org.apache.lucene.util._TestUtil; /** @@ -69,6 +70,7 @@ private Set createdFiles; Set openFilesForWrite = new HashSet(); volatile boolean crashed; + private ThrottledIndexOutput throttledOutput; // use this for tracking files for crash. // additionally: provides debugging information in case you leave one open @@ -114,6 +116,10 @@ public void setPreventDoubleWrite(boolean value) { preventDoubleWrite = value; } + + public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) { + this.throttledOutput = throttledOutput; + } @Override public synchronized void sync(Collection names) throws IOException { @@ -348,7 +354,7 @@ IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name); openFileHandles.put(io, new RuntimeException("unclosed IndexOutput")); openFilesForWrite.add(name); - return io; + return throttledOutput == null ? io : throttledOutput.newFromDelegate(io); } @Override @@ -578,4 +584,5 @@ maybeYield(); delegate.copy(to, src, dest); } + } Index: lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java (revision 0) +++ lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java (revision 0) @@ -0,0 +1,147 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexOutput; + +public class ThrottledIndexOutput extends IndexOutput { + public static final int DEFAULT_MIN_WRITTEN_BYTES = 1024; + private final int bytesPerSecond; + private IndexOutput delegate; + private long flushDelayMillis; + private long closeDelayMillis; + private long seekDelayMillis; + private long pendingBytes; + private long minBytesWritten; + private long timeElapsed; + private final byte[] bytes = new byte[1]; + + public ThrottledIndexOutput newFromDelegate(IndexOutput output) { + return new ThrottledIndexOutput(bytesPerSecond, flushDelayMillis, + closeDelayMillis, seekDelayMillis, minBytesWritten, output); + } + + public ThrottledIndexOutput(int bytesPerSecond, long delayInMillis, + IndexOutput delegate) { + this(bytesPerSecond, delayInMillis, delayInMillis, delayInMillis, + DEFAULT_MIN_WRITTEN_BYTES, delegate); + } + + public ThrottledIndexOutput(int bytesPerSecond, long delays, + int minBytesWritten, IndexOutput delegate) { + this(bytesPerSecond, delays, delays, delays, minBytesWritten, delegate); + } + + public static final int mBitsToBytes(int mbits) { + return mbits * 125000; + } + + public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis, + long closeDelayMillis, long seekDelayMillis, long minBytesWritten, + IndexOutput delegate) { + assert bytesPerSecond > 0; + this.delegate = delegate; + this.bytesPerSecond = bytesPerSecond; + this.flushDelayMillis = flushDelayMillis; + this.closeDelayMillis = closeDelayMillis; + this.seekDelayMillis = seekDelayMillis; + this.minBytesWritten = minBytesWritten; + } + + @Override + public void flush() throws IOException { + sleep(flushDelayMillis); + delegate.flush(); + } + + @Override + public void close() throws IOException { + sleep(closeDelayMillis + getDelay(true)); + delegate.close(); + + } + + @Override + public long getFilePointer() { + return delegate.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + sleep(seekDelayMillis); + delegate.seek(pos); + } + + @Override + public long length() throws IOException { + return delegate.length(); + } + + @Override + public void writeByte(byte b) throws IOException { + bytes[0] = b; + writeBytes(bytes, 0, 1); + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + final long before = System.nanoTime(); + delegate.writeBytes(b, offset, length); + timeElapsed += System.nanoTime() - before; + pendingBytes += length; + sleep(getDelay(false)); + + } + + protected long getDelay(boolean closing) { + if (pendingBytes > 0 && (closing || pendingBytes > minBytesWritten)) { + long actualBps = (timeElapsed / pendingBytes) * 1000000000l; // nano to sec + if (actualBps > bytesPerSecond) { + long expected = (pendingBytes * 1000l / bytesPerSecond) ; + final long delay = expected - (timeElapsed / 1000000l) ; + pendingBytes = 0; + timeElapsed = 0; + return delay; + } + } + return 0; + + } + + private static final void sleep(long ms) { + if (ms <= 0) + return; + try { + Thread.sleep(ms); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + } + + @Override + public void setLength(long length) throws IOException { + delegate.setLength(length); + } + + @Override + public void copyBytes(DataInput input, long numBytes) throws IOException { + delegate.copyBytes(input, numBytes); + } +} Property changes on: lucene\src\test-framework\org\apache\lucene\util\ThrottledIndexOutput.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1097441) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -116,7 +116,7 @@ * If this is set, it is the only method that should run. */ static final String TEST_METHOD; - + /** Create indexes in this directory, optimally use a subdir, named after the test */ public static final File TEMP_DIR; static { @@ -163,11 +163,11 @@ * multiply it by the number of iterations */ public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1")); - + private int savedBoolMaxClauseCount; private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null; - + /** Used to track if setUp and tearDown are called correctly from subclasses */ private boolean setup; @@ -189,28 +189,28 @@ private static class UncaughtExceptionEntry { public final Thread thread; public final Throwable exception; - + public UncaughtExceptionEntry(Thread thread, Throwable exception) { this.thread = thread; this.exception = exception; } } private List uncaughtExceptions = Collections.synchronizedList(new ArrayList()); - + // saves default codec: we do this statically as many build indexes in @beforeClass private static String savedDefaultCodec; // default codec: not set when we use a per-field provider. private static Codec codec; // default codec provider private static CodecProvider savedCodecProvider; - + private static Locale locale; private static Locale savedLocale; private static TimeZone timeZone; private static TimeZone savedTimeZone; - + private static Map stores; - + private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"}; private static void swapCodec(Codec c, CodecProvider cp) { @@ -288,7 +288,7 @@ // randomly picks from core and test codecs static String pickRandomCodec(Random rnd) { - int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + + int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + TEST_CODECS.length); if (idx < CodecProvider.CORE_CODECS.length) { return CodecProvider.CORE_CODECS[idx]; @@ -321,7 +321,7 @@ /** @deprecated (4.0) until we fix no-fork problems in solr tests */ @Deprecated private static List testClassesRun = new ArrayList(); - + @BeforeClass public static void beforeClassLuceneTestCaseJ4() { staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1; @@ -347,7 +347,7 @@ TimeZone.setDefault(timeZone); testsFailed = false; } - + @AfterClass public static void afterClassLuceneTestCaseJ4() { if (! "false".equals(TEST_CLEAN_THREADS)) { @@ -363,12 +363,12 @@ if ("randomPerField".equals(TEST_CODEC)) { if (cp instanceof RandomCodecProvider) codecDescription = cp.toString(); - else + else codecDescription = "PreFlex"; } else { codecDescription = codec.toString(); } - + if (CodecProvider.getDefault() == savedCodecProvider) removeTestCodecs(codec, CodecProvider.getDefault()); CodecProvider.setDefault(savedCodecProvider); @@ -398,14 +398,14 @@ stores = null; // if verbose or tests failed, report some information back if (VERBOSE || testsFailed) - System.err.println("NOTE: test params are: codec=" + codecDescription + - ", locale=" + locale + + System.err.println("NOTE: test params are: codec=" + codecDescription + + ", locale=" + locale + ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID())); if (testsFailed) { System.err.println("NOTE: all tests run in this JVM:"); System.err.println(Arrays.toString(testClassesRun.toArray())); - System.err.println("NOTE: " + System.getProperty("os.name") + " " - + System.getProperty("os.version") + " " + System.err.println("NOTE: " + System.getProperty("os.name") + " " + + System.getProperty("os.version") + " " + System.getProperty("os.arch") + "/" + System.getProperty("java.vendor") + " " + System.getProperty("java.version") + " " @@ -428,7 +428,7 @@ } private static boolean testsFailed; /* true if any tests failed */ - + // This is how we get control when errors occur. // Think of this as start/end/success/failed // events. @@ -463,7 +463,7 @@ LuceneTestCase.this.name = method.getName(); super.starting(method); } - + }; @Before @@ -481,7 +481,7 @@ savedUncaughtExceptionHandler.uncaughtException(t, e); } }); - + savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); } @@ -513,7 +513,7 @@ if ("perMethod".equals(TEST_CLEAN_THREADS)) { int rogueThreads = threadCleanup("test method: '" + getName() + "'"); if (rogueThreads > 0) { - System.err.println("RESOURCE LEAK: test method: '" + getName() + System.err.println("RESOURCE LEAK: test method: '" + getName() + "' left " + rogueThreads + " thread(s) running"); // TODO: fail, but print seed for now. if (!testsFailed && uncaughtExceptions.isEmpty()) { @@ -535,18 +535,18 @@ fail("Some threads threw uncaught exceptions!"); } - // calling assertSaneFieldCaches here isn't as useful as having test - // classes call it directly from the scope where the index readers - // are used, because they could be gc'ed just before this tearDown + // calling assertSaneFieldCaches here isn't as useful as having test + // classes call it directly from the scope where the index readers + // are used, because they could be gc'ed just before this tearDown // method is called. // // But it's better then nothing. // - // If you are testing functionality that you know for a fact - // "violates" FieldCache sanity, then you should either explicitly + // If you are testing functionality that you know for a fact + // "violates" FieldCache sanity, then you should either explicitly // call purgeFieldCache at the end of your test method, or refactor - // your Test class so that the inconsistant FieldCache usages are - // isolated in distinct test methods + // your Test class so that the inconsistant FieldCache usages are + // isolated in distinct test methods assertSaneFieldCaches(getTestLabel()); } finally { @@ -557,14 +557,14 @@ private final static int THREAD_STOP_GRACE_MSEC = 50; // jvm-wide list of 'rogue threads' we found, so they only get reported once. private final static IdentityHashMap rogueThreads = new IdentityHashMap(); - + static { // just a hack for things like eclipse test-runner threads for (Thread t : Thread.getAllStackTraces().keySet()) { rogueThreads.put(t, true); } } - + /** * Looks for leftover running threads, trying to kill them off, * so they don't fail future tests. @@ -575,20 +575,20 @@ Thread[] stillRunning = new Thread[Thread.activeCount()+1]; int threadCount = 0; int rogueCount = 0; - + if ((threadCount = Thread.enumerate(stillRunning)) > 1) { while (threadCount == stillRunning.length) { // truncated response stillRunning = new Thread[stillRunning.length*2]; threadCount = Thread.enumerate(stillRunning); } - + for (int i = 0; i < threadCount; i++) { Thread t = stillRunning[i]; - - if (t.isAlive() && - !rogueThreads.containsKey(t) && - t != Thread.currentThread() && + + if (t.isAlive() && + !rogueThreads.containsKey(t) && + t != Thread.currentThread() && /* its ok to keep your searcher across test cases */ (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) { System.err.println("WARNING: " + context + " left thread running: " + t); @@ -613,7 +613,7 @@ } return rogueCount; } - + /** * Asserts that FieldCacheSanityChecker does not detect any * problems with FieldCache.DEFAULT. @@ -656,13 +656,13 @@ } } - + // @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed: @Deprecated static public void assertEquals(double expected, double actual) { assertEquals(null, expected, actual); } - + @Deprecated static public void assertEquals(String message, double expected, double actual) { assertEquals(message, Double.valueOf(expected), Double.valueOf(actual)); @@ -677,18 +677,18 @@ static public void assertEquals(String message, float expected, float actual) { assertEquals(message, Float.valueOf(expected), Float.valueOf(actual)); } - + // Replacement for Assume jUnit class, so we can add a message with explanation: - + private static final class TestIgnoredException extends RuntimeException { TestIgnoredException(String msg) { super(msg); } - + TestIgnoredException(String msg, Throwable t) { super(msg, t); } - + @Override public String getMessage() { StringBuilder sb = new StringBuilder(super.getMessage()); @@ -696,7 +696,7 @@ sb.append(" - ").append(getCause()); return sb.toString(); } - + // only this one is called by our code, exception is not used outside this class: @Override public void printStackTrace(PrintStream s) { @@ -708,19 +708,19 @@ } } } - + public static void assumeTrue(String msg, boolean b) { Assume.assumeNoException(b ? null : new TestIgnoredException(msg)); } - + public static void assumeFalse(String msg, boolean b) { assumeTrue(msg, !b); } - + public static void assumeNoException(String msg, Exception e) { Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e)); } - + public static Set asSet(T... args) { return new HashSet(Arrays.asList(args)); } @@ -778,7 +778,7 @@ c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000)); } if (r.nextBoolean()) { - c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20)); + c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20))); } if (r.nextBoolean()) { @@ -864,7 +864,7 @@ public static MockDirectoryWrapper newDirectory() throws IOException { return newDirectory(random); } - + /** * Returns a new Directory instance, using the specified random. * See {@link #newDirectory()} for more information. @@ -875,7 +875,7 @@ stores.put(dir, Thread.currentThread().getStackTrace()); return dir; } - + /** * Returns a new Directory instance, with contents copied from the * provided directory. See {@link #newDirectory()} for more @@ -884,23 +884,23 @@ public static MockDirectoryWrapper newDirectory(Directory d) throws IOException { return newDirectory(random, d); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f) throws IOException { return newFSDirectory(f, null); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException { String fsdirClass = TEST_DIRECTORY; if (fsdirClass.equals("random")) { fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; } - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + Class clazz; try { try { @@ -908,11 +908,11 @@ } catch (ClassCastException e) { // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); } MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf)); @@ -922,7 +922,7 @@ throw new RuntimeException(e); } } - + /** * Returns a new Directory instance, using the specified random * with contents copied from the provided directory. See @@ -980,44 +980,44 @@ public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { if (!index.isIndexed()) return new Field(name, value, store, index); - + if (!store.isStored() && random.nextBoolean()) store = Store.YES; // randomly store it - + tv = randomTVSetting(random, tv); - + return new Field(name, value, store, index, tv); } - - static final TermVector tvSettings[] = { - TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, - TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS + + static final TermVector tvSettings[] = { + TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, + TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS }; - + private static TermVector randomTVSetting(Random random, TermVector minimum) { switch(minimum) { case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)]; case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)]; - case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS + case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS : TermVector.WITH_POSITIONS_OFFSETS; - case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS + case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS : TermVector.WITH_POSITIONS_OFFSETS; default: return TermVector.WITH_POSITIONS_OFFSETS; } } - + /** return a random Locale from the available locales on the system */ public static Locale randomLocale(Random random) { Locale locales[] = Locale.getAvailableLocales(); return locales[random.nextInt(locales.length)]; } - + /** return a random TimeZone from the available timezones on the system */ public static TimeZone randomTimeZone(Random random) { String tzIds[] = TimeZone.getAvailableIDs(); return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]); } - + /** return a Locale object equivalent to its programmatic name */ public static Locale localeForName(String localeName) { String elements[] = localeName.split("\\_"); @@ -1039,7 +1039,7 @@ "RAMDirectory", FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2] }; - + public static String randomDirectory(Random random) { if (random.nextInt(10) == 0) { return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)]; @@ -1064,7 +1064,7 @@ return FSDirectory.open(file); } } - + static Directory newDirectoryImpl(Random random, String clazzName) { if (clazzName.equals("random")) clazzName = randomDirectory(random); @@ -1085,9 +1085,9 @@ return clazz.newInstance(); } catch (Exception e) { throw new RuntimeException(e); - } + } } - + /** create a new searcher over the reader. * This searcher might randomly use threads. */ public static IndexSearcher newSearcher(IndexReader r) throws IOException { @@ -1095,8 +1095,8 @@ return new IndexSearcher(r); } else { int threads = 0; - final ExecutorService ex = (random.nextBoolean()) ? null - : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), + final ExecutorService ex = (random.nextBoolean()) ? null + : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), new NamedThreadFactory("LuceneTestCase")); if (ex != null && VERBOSE) { System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); @@ -1121,12 +1121,12 @@ public String getName() { return this.name; } - + /** Gets a resource from the classpath as {@link File}. This method should only be used, * if a real file is needed. To get a stream, code should prefer * {@link Class#getResourceAsStream} using {@code this.getClass()}. */ - + protected File getDataFile(String name) throws IOException { try { return new File(this.getClass().getResource(name).toURI()); @@ -1137,11 +1137,11 @@ // We get here from InterceptTestCaseEvents on the 'failed' event.... public void reportAdditionalFailureInfo() { - System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed) + reproduceWithExtraParams()); } - + // extra params that were overridden needed to reproduce the command private String reproduceWithExtraParams() { StringBuilder sb = new StringBuilder(); @@ -1157,12 +1157,12 @@ private static long staticSeed; // seed for individual test methods, changed in @before private long seed; - + private static final Random seedRand = new Random(); protected static final Random random = new Random(0); private String name = ""; - + /** * Annotation for tests that should only be run during nightly builds. */ @@ -1170,7 +1170,7 @@ @Inherited @Retention(RetentionPolicy.RUNTIME) public @interface Nightly {} - + /** optionally filters the tests to be run by TEST_METHOD */ public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner { private List testMethods; @@ -1200,11 +1200,11 @@ testMethods.add(new FrameworkMethod(m)); } } - + if (testMethods.isEmpty()) { throw new RuntimeException("No runnable methods!"); } - + if (TEST_NIGHTLY == false) { if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) { /* the test class is annotated with nightly, remove all methods */ @@ -1265,9 +1265,9 @@ @Override public boolean shouldRun(Description d) { return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD); - } + } }; - + try { f.apply(this); } catch (NoTestsRemainException e) { @@ -1275,12 +1275,12 @@ } } } - + private static class RandomCodecProvider extends CodecProvider { private List knownCodecs = new ArrayList(); private Map previousMappings = new HashMap(); private final int perFieldSeed; - + RandomCodecProvider(Random random) { this.perFieldSeed = random.nextInt(); register(new StandardCodec()); @@ -1312,13 +1312,13 @@ } return codec.name; } - + @Override public synchronized String toString() { return "RandomCodecProvider: " + previousMappings.toString(); } } - + @Ignore("just a hack") public final void alwaysIgnoredTestMethod() {} } Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (revision 1097441) +++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (working copy) @@ -79,23 +79,23 @@ } } - /** - * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first + /** + * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first */ public static void unzip(File zipName, File destDir) throws IOException { - + ZipFile zipFile = new ZipFile(zipName); - + Enumeration entries = zipFile.entries(); - + rmDir(destDir); - + destDir.mkdir(); LuceneTestCase.tempDirs.add(destDir.getAbsolutePath()); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); - + InputStream in = zipFile.getInputStream(entry); File targetFile = new File(destDir, entry.getName()); if (entry.isDirectory()) { @@ -105,24 +105,24 @@ if (targetFile.getParentFile()!=null) { // be on the safe side: do not rely on that directories are always extracted // before their children (although this makes sense, but is it guaranteed?) - targetFile.getParentFile().mkdirs(); + targetFile.getParentFile().mkdirs(); } OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile)); - + byte[] buffer = new byte[8192]; int len; while((len = in.read(buffer)) >= 0) { out.write(buffer, 0, len); } - + in.close(); out.close(); } } - + zipFile.close(); } - + public static void syncConcurrentMerges(IndexWriter writer) { syncConcurrentMerges(writer.getConfig().getMergeScheduler()); } @@ -138,7 +138,7 @@ public static CheckIndex.Status checkIndex(Directory dir) throws IOException { return checkIndex(dir, CodecProvider.getDefault()); } - + /** This runs the CheckIndex tool on the index in. If any * issues are hit, a RuntimeException is thrown; else, * true is returned. */ @@ -245,7 +245,7 @@ 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200, 0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000 }; - + private static final int[] blockEnds = { 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F, @@ -271,12 +271,12 @@ 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF, 0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF }; - + /** Returns random string, all codepoints within the same unicode block. */ public static String randomRealisticUnicodeString(Random r) { return randomRealisticUnicodeString(r, 20); } - + /** Returns random string, all codepoints within the same unicode block. */ public static String randomRealisticUnicodeString(Random r, int maxLength) { final int end = r.nextInt(maxLength);