Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/realtime_search:r953476-1097271 Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /lucene/dev/branches/bulkpostings/solr:r1069647,1069925 Reverse-merged /lucene/dev/branches/preflexfixes/solr:r967125-979432 Reverse-merged /lucene/dev/branches/branch_3x/solr:r949730,957490*,961612,979161,980654,982195,987811,988512,1025544,1026614,1034080,1039151,1050654,1056762,1060014,1060438,1060784,1061035,1063880,1063934,1065324,1070818,1075044,1079376,1079381,1080071,1081052,1083240,1085811,1090423,1091501,1092373 Reverse-merged /lucene/java/branches/lucene_2_9/solr:r817269-818600,825998,829134,829881,831036,896850,909334 Reverse-merged /lucene/dev/branches/lucene_solr_3_1/solr:r1081856,1083239,1085499,1085511,1085532,1085809 Reverse-merged /lucene/java/branches/lucene_2_9_back_compat_tests/solr:r818601-821336 Reverse-merged /lucene/java/branches/lucene_3_0/solr:r880793,896906 Reverse-merged /lucene/solr/trunk:r922950-923910,923912-925091 Reverse-merged /lucene/java/branches/lucene_2_4/solr:r748824 Merged /lucene/dev/branches/realtime_search/solr:r953476-1097271 Property changes on: solr\example\exampledocs\gb18030-example.xml ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/lucene_solr_3_1/solr/example/exampledocs/gb18030-example.xml:r1081856,1083239,1085499,1085511,1085532,1085809 Merged /lucene/java/branches/lucene_2_9_back_compat_tests/solr/example/exampledocs/gb18030-example.xml:r818601-821336 Merged /lucene/java/branches/lucene_3_0/solr/example/exampledocs/gb18030-example.xml:r880793,896906 Merged /lucene/solr/trunk/example/exampledocs/gb18030-example.xml:r922950-923910,923912-925091 Merged /lucene/java/branches/lucene_2_4/solr/example/exampledocs/gb18030-example.xml:r748824 Merged /lucene/dev/branches/bulkpostings/solr/example/exampledocs/gb18030-example.xml:r1069647,1069925 Merged /lucene/dev/branches/preflexfixes/solr/example/exampledocs/gb18030-example.xml:r967125-979432 Merged /lucene/dev/branches/branch_3x/solr/example/exampledocs/gb18030-example.xml:r949730,961612,979161,980654,982195,987811,988512,1025544,1026614,1034080,1039151,1050654,1056762,1060014,1060438,1060784,1061035,1063880,1063934,1065324,1070818,1075044,1079376,1079381,1080071,1081052,1083240,1085811,1090423,1091501,1092373,1095519 Merged /lucene/java/branches/lucene_2_9/solr/example/exampledocs/gb18030-example.xml:r817269-818600,825998,829134,829881,831036,896850,909334 Property changes on: modules ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/preflexfixes/modules:r967125-979432 Merged /lucene/dev/branches/branch_3x/modules:r949730,957490,961612,979161 Merged /lucene/dev/branches/realtime_search/modules:r953476-1097271 Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/realtime_search/lucene:r953476-1097271 Merged /lucene/solr/branches/newtrunk/lucene:r924462-924482 Index: lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -67,7 +67,7 @@ // ignore deletions CachingSpanFilter filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE); - + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); @@ -97,7 +97,7 @@ reader = refreshReader(reader); searcher.close(); searcher = newSearcher(reader); - + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); Index: lucene/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDoc.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -111,7 +111,7 @@ public void testIndexAndMerge() throws Exception { StringWriter sw = new StringWriter(); PrintWriter out = new PrintWriter(sw, true); - + Directory directory = newFSDirectory(indexDir); IndexWriter writer = new IndexWriter( directory, @@ -136,7 +136,7 @@ SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false); printSegment(out, siMerge3); - + directory.close(); out.close(); sw.close(); @@ -170,7 +170,7 @@ siMerge3 = merge(siMerge, siMerge2, "merge3", true); printSegment(out, siMerge3); - + directory.close(); out.close(); sw.close(); @@ -207,11 +207,11 @@ final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos); - + if (useCompoundFile) { Collection filesToDelete = merger.createCompoundFile(merged + ".cfs", info); info.setUseCompoundFile(true); - for (final String fileToDelete : filesToDelete) + for (final String fileToDelete : filesToDelete) si1.dir.deleteFile(fileToDelete); } Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (working copy) @@ -34,7 +34,7 @@ * implemented properly */ public class TestIndexReaderClone extends LuceneTestCase { - + public void testCloneReadOnlySegmentReader() throws Exception { final Directory dir1 = newDirectory(); @@ -67,7 +67,7 @@ r2.close(); dir1.close(); } - + // open non-readOnly reader1, clone to non-readOnly // reader2, make sure we can change reader1 public void testCloneWriteToOrig() throws Exception { @@ -83,7 +83,7 @@ r2.close(); dir1.close(); } - + // open non-readOnly reader1, clone to non-readOnly // reader2, make sure we can change reader2 public void testCloneWriteToClone() throws Exception { @@ -105,7 +105,7 @@ dir1.close(); } - + // create single-segment index, open non-readOnly // SegmentReader, add docs, reopen to multireader, then do // delete @@ -116,7 +116,7 @@ IndexReader reader1 = IndexReader.open(dir1, false); TestIndexReaderReopen.modifyIndex(5, dir1); - + IndexReader reader2 = reader1.reopen(); assertTrue(reader1 != reader2); @@ -208,7 +208,7 @@ reader2.close(); dir1.close(); } - + private static boolean deleteWorked(int doc, IndexReader r) { boolean exception = false; try { @@ -219,7 +219,7 @@ } return !exception; } - + public void testCloneReadOnlyDirectoryReader() throws Exception { final Directory dir1 = newDirectory(); @@ -268,7 +268,7 @@ * are not the same on each reader 5. Verify the doc deleted is only in the * cloned reader 6. Try to delete a document in the original reader, an * exception should be thrown - * + * * @param r1 IndexReader to perform tests on * @throws Exception */ @@ -323,7 +323,7 @@ // need to test norms? dir1.close(); } - + public void testSegmentReaderCloseReferencing() throws Exception { final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); @@ -343,7 +343,7 @@ clonedSegmentReader.close(); dir1.close(); } - + public void testSegmentReaderDelDocsReferenceCounting() throws Exception { final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); @@ -454,16 +454,16 @@ private void assertDelDocsRefCountEquals(int refCount, SegmentReader reader) { assertEquals(refCount, reader.deletedDocsRef.get()); } - + public void testCloneSubreaders() throws Exception { final Directory dir1 = newDirectory(); - + TestIndexReaderReopen.createIndex(random, dir1, true); IndexReader reader = IndexReader.open(dir1, false); reader.deleteDocument(1); // acquire write lock IndexReader[] subs = reader.getSequentialSubReaders(); assert subs.length > 1; - + IndexReader[] clones = new IndexReader[subs.length]; for (int x=0; x < subs.length; x++) { clones[x] = (IndexReader) subs[x].clone(); @@ -483,9 +483,9 @@ IndexReader r2 = r1.clone(false); r1.deleteDocument(5); r1.decRef(); - + r1.incRef(); - + r2.close(); r1.decRef(); r1.close(); Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReader.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -60,13 +60,13 @@ public class TestIndexReader extends LuceneTestCase { - + public void testCommitUserData() throws Exception { Directory d = newDirectory(); Map commitUserData = new HashMap(); commitUserData.put("foo", "fighters"); - + // set up writer IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) @@ -74,12 +74,12 @@ for(int i=0;i<27;i++) addDocumentWithFields(writer); writer.close(); - + IndexReader r = IndexReader.open(d, false); r.deleteDocument(5); r.flush(commitUserData); r.close(); - + SegmentInfos sis = new SegmentInfos(); sis.read(d); IndexReader r2 = IndexReader.open(d, false); @@ -115,10 +115,10 @@ r3.close(); d.close(); } - + public void testIsCurrent() throws Exception { Directory d = newDirectory(); - IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDocumentWithFields(writer); writer.close(); @@ -205,7 +205,7 @@ doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); } - + writer.close(); // verify fields again reader = IndexReader.open(d, false); @@ -224,10 +224,10 @@ assertTrue(fieldNames.contains("tvposition")); assertTrue(fieldNames.contains("tvoffset")); assertTrue(fieldNames.contains("tvpositionoffset")); - + // verify that only indexed fields were returned fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED); - assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields + assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields assertTrue(fieldNames.contains("keyword")); assertTrue(fieldNames.contains("text")); assertTrue(fieldNames.contains("unstored")); @@ -239,26 +239,26 @@ assertTrue(fieldNames.contains("tvposition")); assertTrue(fieldNames.contains("tvoffset")); assertTrue(fieldNames.contains("tvpositionoffset")); - + // verify that only unindexed fields were returned fieldNames = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED); assertEquals(2, fieldNames.size()); // the following fields assertTrue(fieldNames.contains("unindexed")); assertTrue(fieldNames.contains("unindexed2")); - - // verify index term vector fields + + // verify index term vector fields fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR); assertEquals(1, fieldNames.size()); // 1 field has term vector only assertTrue(fieldNames.contains("termvector")); - + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION); assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors assertTrue(fieldNames.contains("tvposition")); - + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET); assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors assertTrue(fieldNames.contains("tvoffset")); - + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET); assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors assertTrue(fieldNames.contains("tvpositionoffset")); @@ -366,13 +366,13 @@ reader2.close(); dir.close(); } - + public void testBinaryFields() throws IOException { Directory dir = newDirectory(); byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); - + for (int i = 0; i < 10; i++) { addDoc(writer, "document number " + (i + 1)); addDocumentWithFields(writer); @@ -589,14 +589,14 @@ reader = IndexReader.open(dir, false); reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); - + // now open reader again & set norm for doc 0 (writes to _0_2.s0) reader = IndexReader.open(dir, false); reader.setNorm(0, "content", sim.encodeNormValue(2.0f)); reader.close(); assertFalse("failed to remove first generation norms file on writing second generation", dir.fileExists("_0_1.s0")); - + dir.close(); } @@ -619,7 +619,7 @@ } rmDir(fileDirName); }*/ - + public void testDeleteReaderWriterConflictOptimized() throws IOException{ deleteReaderWriterConflict(true); } @@ -802,7 +802,7 @@ // expected exception } try { - IndexWriter.unlock(dir); // this should not be done in the real world! + IndexWriter.unlock(dir); // this should not be done in the real world! } catch (LockReleaseFailedException lrfe) { writer.close(); } @@ -866,7 +866,7 @@ public void testDeleteReaderReaderConflictUnoptimized() throws IOException{ deleteReaderReaderConflict(false); } - + public void testDeleteReaderReaderConflictOptimized() throws IOException{ deleteReaderReaderConflict(true); } @@ -880,7 +880,7 @@ Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; - + // First build up a starting index: MockDirectoryWrapper startDir = newDirectory(); IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -1066,7 +1066,7 @@ } public void testDocsOutOfOrderJIRA140() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for(int i=0;i<11;i++) { addDoc(writer, "aaa"); @@ -1106,7 +1106,7 @@ public void testExceptionReleaseWriteLockJIRA768() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); addDoc(writer, "aaa"); writer.close(); @@ -1157,7 +1157,7 @@ } catch (FileNotFoundException e) { // expected } - + dir.close(); } @@ -1315,10 +1315,10 @@ doc.add(newField("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); doc.add(newField("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - + writer.addDocument(doc); } - + private void addDoc(IndexWriter writer, String value) throws IOException { Document doc = new Document(); doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED)); @@ -1330,7 +1330,7 @@ assertEquals("IndexReaders have different values for maxDoc.", index1.maxDoc(), index2.maxDoc()); assertEquals("Only one IndexReader has deletions.", index1.hasDeletions(), index2.hasDeletions()); assertEquals("Only one index is optimized.", index1.isOptimized(), index2.isOptimized()); - + // check field names Collection fields1 = index1.getFieldNames(FieldOption.ALL); Collection fields2 = index1.getFieldNames(FieldOption.ALL); @@ -1340,7 +1340,7 @@ while (it1.hasNext()) { assertEquals("Different field names.", it1.next(), it2.next()); } - + // check norms it1 = fields1.iterator(); while (it1.hasNext()) { @@ -1359,7 +1359,7 @@ assertSame(norms1, norms2); } } - + // check deletions final Bits delDocs1 = MultiFields.getDeletedDocs(index1); final Bits delDocs2 = MultiFields.getDeletedDocs(index2); @@ -1368,7 +1368,7 @@ delDocs1 == null || delDocs1.get(i), delDocs2 == null || delDocs2.get(i)); } - + // check stored fields for (int i = 0; i < index1.maxDoc(); i++) { if (delDocs1 == null || !delDocs1.get(i)) { @@ -1384,10 +1384,10 @@ Field curField2 = (Field) itField2.next(); assertEquals("Different fields names for doc " + i + ".", curField1.name(), curField2.name()); assertEquals("Different field values for doc " + i + ".", curField1.stringValue(), curField2.stringValue()); - } + } } } - + // check dictionary and posting lists FieldsEnum fenum1 = MultiFields.getFields(index1).iterator(); FieldsEnum fenum2 = MultiFields.getFields(index1).iterator(); @@ -1467,7 +1467,7 @@ r.close(); r2.close(); d.close(); - } + } public void testReadOnly() throws Throwable { Directory d = newDirectory(); @@ -1518,7 +1518,7 @@ IndexReader r3 = r2.reopen(); assertFalse(r3 == r2); r2.close(); - + assertFalse(r == r2); try { @@ -1602,7 +1602,7 @@ public void testNoDupCommitFileNames() throws Throwable { Directory dir = newDirectory(); - + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2)); @@ -1610,12 +1610,12 @@ writer.addDocument(createDocument("a")); writer.addDocument(createDocument("a")); writer.close(); - + Collection commits = IndexReader.listCommits(dir); for (final IndexCommit commit : commits) { Collection files = commit.getFileNames(); HashSet seen = new HashSet(); - for (final String fileName : files) { + for (final String fileName : files) { assertTrue("file " + fileName + " was duplicated", !seen.contains(fileName)); seen.add(fileName); } @@ -1820,7 +1820,7 @@ // LUCENE-2046 public void testPrepareCommitIsCurrent() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); Document doc = new Document(); @@ -1838,12 +1838,12 @@ r.close(); dir.close(); } - + // LUCENE-2753 public void testListCommits() throws Exception { Directory dir = newDirectory(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, null).setIndexDeletionPolicy(sdp)); writer.addDocument(new Document()); writer.commit(); Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -201,7 +201,7 @@ Map docs = new HashMap(); IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE) - .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) + .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)) .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); Index: lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy) @@ -121,7 +121,7 @@ fieldInfos = new FieldInfos(dir, IndexFileNames.segmentFileName(seg, "", IndexFileNames.FIELD_INFOS_EXTENSION)); } - + @Override public void tearDown() throws Exception { dir.close(); @@ -130,17 +130,17 @@ private class MyTokenStream extends TokenStream { int tokenUpto; - + CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; OffsetAttribute offsetAtt; - + public MyTokenStream() { termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } - + @Override public boolean incrementToken() { if (tokenUpto >= tokens.length) Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -148,8 +148,8 @@ writer.addDocument(doc); } - + public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); @@ -262,7 +262,7 @@ if (VERBOSE) { System.out.println("TEST: config1=" + writer.getConfig()); } - + for(int j=0;j<500;j++) { addDocWithIndex(writer, j); } @@ -338,7 +338,7 @@ assertEquals("should be one document", reader2.numDocs(), 1); reader.close(); reader2.close(); - + dir.close(); } @@ -367,14 +367,14 @@ * these docs until writer is closed. */ public void testCommitOnClose() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); @@ -415,14 +415,14 @@ * and add docs to it. */ public void testCommitOnCloseAbort() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); @@ -450,7 +450,7 @@ hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("saw changes after writer.abort", 14, hits.length); searcher.close(); - + // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) @@ -567,7 +567,7 @@ * and close(). */ public void testCommitOnCloseOptimize() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once @@ -634,7 +634,7 @@ } public void testIndexNoDocuments() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); writer.close(); @@ -656,7 +656,7 @@ } public void testManyFields() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for(int j=0;j<100;j++) { Document doc = new Document(); @@ -686,7 +686,7 @@ } public void testSmallRAMBuffer() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). @@ -782,13 +782,14 @@ writer.deleteDocuments(new Term("field", "aaa" + j)); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); + if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created assertEquals(flushCount, lastFlushCount); } else if (10 == j) { - assertTrue(flushCount > lastFlushCount); + assertTrue("" + j, flushCount > lastFlushCount); lastFlushCount = flushCount; writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDeleteTerms(1); @@ -825,7 +826,7 @@ } public void testDiverseDocs() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); for(int i=0;i<3;i++) { // First, docs where every term is unique (heavy on @@ -872,12 +873,12 @@ } public void testEnablingNorms() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, pre flush for(int j=0;j<10;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 8) { f.setOmitNorms(true); } @@ -898,7 +899,7 @@ // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 26) { f.setOmitNorms(true); } @@ -918,7 +919,7 @@ } public void testHighFreqTerm() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's @@ -968,7 +969,7 @@ return myLockFactory.makeLock(name); } } - + Directory dir = new MyRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -976,7 +977,7 @@ addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("did not get right number of hits", 100, hits.length); @@ -1073,7 +1074,7 @@ infos.read(dir); assertEquals(2, infos.size()); } - } + } dir.close(); } @@ -1089,7 +1090,7 @@ Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); - + Document document = new Document(); document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); iw.addDocument(document); @@ -1343,7 +1344,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1370,12 +1371,12 @@ writer.close(); dir.close(); } - + // LUCENE-325: test expungeDeletes, when 2 singular merges // are required public void testExpungeDeletes() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH)); @@ -1537,14 +1538,14 @@ public void doAfterFlush() { afterWasCalled = true; } - + @Override protected void doBeforeFlush() throws IOException { beforeWasCalled = true; } } - + // LUCENE-1222 public void testDoBeforeAfterFlush() throws IOException { Directory dir = newDirectory(); @@ -1572,7 +1573,7 @@ } - + final String[] utf8Data = new String[] { // unpaired low surrogate "ab\udc17cd", "ab\ufffdcd", @@ -1642,7 +1643,7 @@ } UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); - + String s1 = new String(chars, 0, len); String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8"); assertEquals("codepoint " + ch, s1, s2); @@ -1699,7 +1700,7 @@ expected[i++] = 0xfffd; expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); hasIllegal = true; - } else + } else expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); } else { expected[i] = buffer[i] = ' '; @@ -1796,10 +1797,10 @@ final TokenStream tokens = new TokenStream() { final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - + final Iterator terms = Arrays.asList("a","b","c").iterator(); boolean first = true; - + @Override public boolean incrementToken() { if (!terms.hasNext()) return false; @@ -1856,7 +1857,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1912,7 +1913,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1979,7 +1980,7 @@ byte[] b = new byte[50]; for(int i=0;i<50;i++) b[i] = (byte) (i+77); - + Document doc = new Document(); Field f = new Field("binary", b, 10, 17); byte[] bx = f.getBinaryValue(); @@ -2016,7 +2017,7 @@ // commit(Map) never called for this index assertEquals(0, r.getCommitUserData().size()); r.close(); - + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); for(int j=0;j<17;j++) addDoc(w); @@ -2024,7 +2025,7 @@ data.put("label", "test1"); w.commit(data); w.close(); - + assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); r = IndexReader.open(dir, true); @@ -2036,7 +2037,7 @@ w.close(); assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); - + dir.close(); } @@ -2046,7 +2047,7 @@ Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random); analyzer.setPositionIncrementGap( 100 ); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); Field f = newField("field", "", Field.Store.NO, @@ -2073,7 +2074,7 @@ // LUCENE-1468 -- make sure opening an IndexWriter with // create=true does not remove non-index files - + public void testOtherFiles() throws Throwable { Directory dir = newDirectory(); try { @@ -2132,7 +2133,7 @@ @Override public void run() { // LUCENE-2239: won't work with NIOFS/MMAP - Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); + Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); IndexWriter w = null; while(!finish) { try { @@ -2141,7 +2142,7 @@ if (w != null) { w.close(); } - IndexWriterConfig conf = newIndexWriterConfig( + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2); w = new IndexWriter(dir, conf); @@ -2208,10 +2209,10 @@ e.printStackTrace(System.out); } } - try { + try { dir.close(); - } catch (IOException e) { - throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); } } } @@ -2226,7 +2227,7 @@ // interrupt arrives while class loader is trying to // init this class (in servicing a first interrupt): assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); - + // issue 100 interrupts to child thread int i = 0; while(i < 100) { @@ -2260,12 +2261,12 @@ doc.add(f); doc.add(f2); w.addDocument(doc); - + // add 2 docs to test in-memory merging f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); w.addDocument(doc); - + // force segment flush so we can force a segment merge with doc3 later. w.commit(); @@ -2288,7 +2289,7 @@ assertTrue(ir.document(0).getFieldable("binary").isBinary()); assertTrue(ir.document(1).getFieldable("binary").isBinary()); assertTrue(ir.document(2).getFieldable("binary").isBinary()); - + assertEquals("value", ir.document(0).get("string")); assertEquals("value", ir.document(1).get("string")); assertEquals("value", ir.document(2).get("string")); @@ -2359,7 +2360,7 @@ public void testNoDocsIndex() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); writer.setInfoStream(new PrintStream(bos)); @@ -2369,7 +2370,7 @@ _TestUtil.checkIndex(dir); dir.close(); } - + // LUCENE-2095: make sure with multiple threads commit // doesn't return until all changes are in fact in the // index @@ -2377,7 +2378,7 @@ final int NUM_THREADS = 5; final double RUN_SEC = 0.5; final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( + final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); _TestUtil.reduceOpenFiles(w.w); w.commit(); @@ -2562,7 +2563,7 @@ Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED); d.add(f); w.addDocument(d); - + IndexReader r = w.getReader().getSequentialSubReaders()[0]; TermsEnum t = r.fields().terms("field").iterator(); int count = 0; @@ -2648,10 +2649,10 @@ // in case a deletion policy which holds onto commits is used. Directory dir = newDirectory(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(sdp)); - + // First commit Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2661,7 +2662,7 @@ // Keep that commit sdp.snapshot("id"); - + // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2673,11 +2674,11 @@ sdp.release("id"); writer.deleteUnusedFiles(); assertEquals(1, IndexReader.listCommits(dir).size()); - + writer.close(); dir.close(); } - + private static class FlushCountingIndexWriter extends IndexWriter { int flushCount; public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException { @@ -2689,7 +2690,7 @@ } } - public void testIndexingThenDeleting() throws Exception { + public void _testIndexingThenDeleting() throws Exception { final Random r = random; Directory dir = newDirectory(); @@ -2732,7 +2733,7 @@ w.close(); dir.close(); } - + public void testNoCommits() throws Exception { // Tests that if we don't call commit(), the directory has 0 commits. This has // changed since LUCENE-2386, where before IW would always commit on a fresh @@ -2753,7 +2754,7 @@ public void testEmptyFSDirWithNoLock() throws Exception { // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), - // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed + // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed // when listAll() was called in IndexFileDeleter. Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); @@ -2762,10 +2763,10 @@ public void testEmptyDirRollback() throws Exception { // Tests that if IW is created over an empty Directory, some documents are - // indexed, flushed (but not committed) and then IW rolls back, then no + // indexed, flushed (but not committed) and then IW rolls back, then no // files are left in the Directory. Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); String[] files = dir.listAll(); @@ -2789,7 +2790,7 @@ writer.addDocument(doc); // Adding just one document does not call flush yet. assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); - + doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); @@ -2810,17 +2811,17 @@ public void testNoSegmentFile() throws IOException { Directory dir = newDirectory(); dir.setLockFactory(NoLockFactory.getNoLockFactory()); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); - + Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); w.addDocument(doc); w.addDocument(doc); - IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2) .setOpenMode(OpenMode.CREATE)); - + w2.close(); // If we don't do that, the test fails on Windows w.rollback(); @@ -2859,7 +2860,7 @@ w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); assertEquals(1, w.numDocs()); - + // commit IndexWriter to "third" w.addDocument(doc); commitData.put("tag", "third"); @@ -2914,7 +2915,7 @@ } final int docCount = 200*RANDOM_MULTIPLIER; final int fieldCount = _TestUtil.nextInt(rand, 1, 5); - + final List fieldIDs = new ArrayList(); Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -2924,7 +2925,7 @@ } final Map docs = new HashMap(); - + if (VERBOSE) { System.out.println("TEST: build index docCount=" + docCount); } @@ -3111,7 +3112,7 @@ Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, dir, new StringSplitAnalyzer()); - char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8]; + char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy) @@ -223,8 +223,9 @@ threads[i].join(); for(int i=0;i dif = difFiles(files, files2); - + if (!Arrays.equals(files, files2)) { fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.length-files.length) + " files but only deleted " + (filesPre.length - files2.length) + "; expected files:\n " + asString(files) + "\n actual files:\n " + asString(files2)+"\ndif: "+dif); } @@ -172,7 +172,7 @@ Set set1 = new HashSet(); Set set2 = new HashSet(); Set extra = new HashSet(); - + for (int x=0; x < files1.length; x++) { set1.add(files1[x]); } @@ -195,7 +195,7 @@ } return extra; } - + private String asString(String[] l) { String s = ""; for(int i=0;i getters = new HashSet(); getters.add("getAnalyzer"); @@ -91,7 +96,11 @@ getters.add("getMergePolicy"); getters.add("getMaxThreadStates"); getters.add("getReaderPooling"); + getters.add("getIndexerThreadPool"); getters.add("getReaderTermsIndexDivisor"); + getters.add("getFlushPolicy"); + getters.add("getRAMPerThreadHardLimitMB"); + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) { assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName())); @@ -107,12 +116,12 @@ if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) { - assertEquals("method " + m.getName() + " does not return IndexWriterConfig", + assertEquals("method " + m.getName() + " does not return IndexWriterConfig", IndexWriterConfig.class, m.getReturnType()); } } } - + @Test public void testConstants() throws Exception { // Tests that the values of the constants does not change @@ -126,7 +135,7 @@ assertEquals(8, IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); assertEquals(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR); } - + @Test public void testToString() throws Exception { String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).toString(); @@ -143,15 +152,15 @@ assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1); } } - + @Test public void testClone() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); IndexWriterConfig clone = (IndexWriterConfig) conf.clone(); - + // Clone is shallow since not all parameters are cloneable. assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy()); - + conf.setMergeScheduler(new SerialMergeScheduler()); assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass()); } @@ -159,14 +168,14 @@ @Test public void testInvalidValues() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); - + // Test IndexDeletionPolicy assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(null); assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); - + // Test MergeScheduler assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); conf.setMergeScheduler(new SerialMergeScheduler()); @@ -183,12 +192,12 @@ assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); // Test IndexingChain - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); conf.setIndexingChain(new MyIndexingChain()); assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass()); conf.setIndexingChain(null); - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); - + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); + try { conf.setMaxBufferedDeleteTerms(0); fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); @@ -239,12 +248,26 @@ // this is expected } + try { + conf.setRAMPerThreadHardLimitMB(2048); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); + } catch (IllegalArgumentException e) { + // this is expected + } + + try { + conf.setRAMPerThreadHardLimitMB(0); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); + } catch (IllegalArgumentException e) { + // this is expected + } + assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); - conf.setMaxThreadStates(5); + conf.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(5)); assertEquals(5, conf.getMaxThreadStates()); - conf.setMaxThreadStates(0); + conf.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(0)); assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); - + // Test MergePolicy assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(new LogDocMergePolicy()); Index: lucene/src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestCodecs.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -381,7 +381,7 @@ this.register(new MockSepCodec()); this.setDefaultFieldCodec("MockSep"); } - + } private class Verify extends Thread { Index: lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (working copy) @@ -50,7 +50,7 @@ boolean isClose = false; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { - if ("doFlush".equals(trace[i].getMethodName())) { + if ("flush".equals(trace[i].getMethodName())) { isDoFlush = true; } if ("close".equals(trace[i].getMethodName())) { Index: lucene/src/test/org/apache/lucene/index/TestByteSlices.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestByteSlices.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestByteSlices.java (working copy) @@ -39,7 +39,7 @@ starts[stream] = -1; counters[stream] = 0; } - + int num = 10000 * RANDOM_MULTIPLIER; for (int iter = 0; iter < num; iter++) { int stream = random.nextInt(NUM_STREAM); @@ -67,7 +67,7 @@ if (VERBOSE) System.out.println(" addr now " + uptos[stream]); } - + for(int stream=0;stream stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); //System.out.println("stored size: " + stored.size()); assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3); - + TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); assertTrue(vector != null); BytesRef [] terms = vector.getTerms(); @@ -116,7 +115,7 @@ assertTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); assertTrue(vector instanceof TermPositionVector == true); - + for (int i = 0; i < terms.length; i++) { String term = terms[i].utf8ToString(); int freq = freqs[i]; @@ -127,5 +126,5 @@ TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); - } + } } Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -42,7 +42,7 @@ import org.apache.lucene.util._TestUtil; public class TestAddIndexes extends LuceneTestCase { - + public void testSimpleCase() throws IOException { // main directory Directory dir = newDirectory(); @@ -204,9 +204,9 @@ doc.add(newField("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.updateDocument(new Term("id", "" + (i%10)), doc); } - + writer.addIndexes(aux); - + // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.add(new Term("content", "bbb")); @@ -619,7 +619,7 @@ doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } - + private abstract class RunAddIndexesThreads { Directory dir, dir2; @@ -646,8 +646,8 @@ writer2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer2.setInfoStream(VERBOSE ? System.out : null); writer2.commit(); - + readers = new IndexReader[NUM_COPY]; for(int i=0;i dataset = asSet(data); - + private static String MAGIC_FIELD = "f"+(NUM_FIELDS/3); - + private static FieldSelector SELECTOR = new FieldSelector() { public FieldSelectorResult accept(String f) { if (f.equals(MAGIC_FIELD)) { @@ -58,22 +58,21 @@ return FieldSelectorResult.LAZY_LOAD; } }; - - private Directory makeIndex() throws Exception { + + private Directory makeIndex() throws Exception { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); - for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++ ) { - doc.add(newField("f"+f, - data[f % data.length] - + '#' + data[random.nextInt(data.length)], - Field.Store.YES, + doc.add(newField("f"+f, + data[f % data.length] + + '#' + data[random.nextInt(data.length)], + Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); @@ -84,14 +83,14 @@ } return dir; } - + public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); - + List fields = d.getFields(); for (Iterator fi = fields.iterator(); fi.hasNext(); ) { Fieldable f=null; @@ -101,7 +100,7 @@ String fval = f.stringValue(); assertNotNull(docs[i]+" FIELD: "+fname, fval); String[] vals = fval.split("#"); - if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { + if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:"+fname+",VAL:"+fval); } } catch (Exception e) { @@ -116,7 +115,7 @@ public void testLazyWorks() throws Exception { doTest(new int[] { 399 }); } - + public void testLazyAlsoWorks() throws Exception { doTest(new int[] { 399, 150 }); } Index: lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (working copy) @@ -40,7 +40,7 @@ */ public class TestDeletionPolicy extends LuceneTestCase { - + private void verifyCommitOrder(List commits) throws IOException { final IndexCommit firstCommit = commits.get(0); long last = SegmentInfos.generationFromSegmentsFileName(firstCommit.getSegmentsFileName()); @@ -135,7 +135,7 @@ verifyCommitOrder(commits); doDeletes(commits, true); } - + private void doDeletes(List commits, boolean isCommit) { // Assert that we really are only called for each new @@ -248,7 +248,7 @@ // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.getCurrentSegmentGeneration(dir); - + String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); @@ -276,7 +276,7 @@ // OK break; } - + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } @@ -449,7 +449,7 @@ // Now 8 because we made another commit assertEquals(7, IndexReader.listCommits(dir).size()); - + r = IndexReader.open(dir, true); // Not optimized because we rolled it back, and now only // 10 docs @@ -471,7 +471,7 @@ // but this time keeping only the last commit: writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexCommit(lastCommit)); assertEquals(10, writer.numDocs()); - + // Reader still sees optimized index, because writer // opened on the prior commit has not yet committed: r = IndexReader.open(dir, true); @@ -626,7 +626,7 @@ } IndexWriter writer = new IndexWriter(dir, conf); writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for(int i=0;i 0); assertTrue("seeksCounter=" + this.seeksCounter + " numHits=" + numHits, this.seeksCounter <= numHits + 1); } - + public void testLazySkipping() throws IOException { assumeFalse("This test cannot run with SimpleText codec", CodecProvider.getDefault().getFieldCodec(this.field).equals("SimpleText")); // test whether only the minimum amount of seeks() @@ -140,7 +140,7 @@ performTest(10); searcher.close(); } - + public void testSeek() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -149,7 +149,7 @@ doc.add(newField(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } - + writer.close(); IndexReader reader = IndexReader.open(directory, true); @@ -176,55 +176,55 @@ } reader.close(); directory.close(); - + } - + // Simply extends IndexInput in a way that we are able to count the number // of invocations of seek() class SeeksCountingStream extends IndexInput { - private IndexInput input; - - + private IndexInput input; + + SeeksCountingStream(IndexInput input) { this.input = input; - } - + } + @Override public byte readByte() throws IOException { return this.input.readByte(); } - + @Override public void readBytes(byte[] b, int offset, int len) throws IOException { - this.input.readBytes(b, offset, len); + this.input.readBytes(b, offset, len); } - + @Override public void close() throws IOException { this.input.close(); } - + @Override public long getFilePointer() { return this.input.getFilePointer(); } - + @Override public void seek(long pos) throws IOException { TestLazyProxSkipping.this.seeksCounter++; this.input.seek(pos); } - + @Override public long length() { return this.input.length(); } - + @Override public Object clone() { return new SeeksCountingStream((IndexInput) this.input.clone()); } - + } } Index: lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java =================================================================== --- lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java (revision 1097270) +++ lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java (working copy) @@ -38,12 +38,12 @@ Set fileExtensions = new HashSet(); fileExtensions.add(IndexFileNames.FIELDS_EXTENSION); fileExtensions.add(IndexFileNames.FIELDS_INDEX_EXTENSION); - + MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); primaryDir.setCheckIndexOnClose(false); // only part of an index MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); secondaryDir.setCheckIndexOnClose(false); // only part of an index - + FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); IndexWriter writer = new IndexWriter( fsd, Index: lucene/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -35,20 +35,20 @@ final class DocInverterPerField extends DocFieldConsumerPerField { - final private DocInverterPerThread perThread; - final private FieldInfo fieldInfo; + final private DocInverter parent; + final FieldInfo fieldInfo; final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; - public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) { - this.perThread = perThread; + public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) { + this.parent = parent; this.fieldInfo = fieldInfo; - docState = perThread.docState; - fieldState = perThread.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); - this.endConsumer = perThread.endConsumer.addField(this, fieldInfo); + docState = parent.docState; + fieldState = parent.fieldState; + this.consumer = parent.consumer.addField(this, fieldInfo); + this.endConsumer = parent.endConsumer.addField(this, fieldInfo); } @Override @@ -80,8 +80,8 @@ if (!field.isTokenized()) { // un-tokenized field String stringValue = field.stringValue(); final int valueLength = stringValue.length(); - perThread.singleToken.reinit(stringValue, 0, valueLength); - fieldState.attributeSource = perThread.singleToken; + parent.singleToken.reinit(stringValue, 0, valueLength); + fieldState.attributeSource = parent.singleToken; consumer.start(field); boolean success = false; @@ -89,8 +89,9 @@ consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.offset += valueLength; fieldState.length++; @@ -114,8 +115,8 @@ if (stringValue == null) { throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); } - perThread.stringReader.init(stringValue); - reader = perThread.stringReader; + parent.stringReader.init(stringValue); + reader = parent.stringReader; } // Tokenize field and add to postingTable @@ -166,8 +167,9 @@ consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.length++; fieldState.position++; @@ -195,4 +197,9 @@ consumer.finish(); endConsumer.finish(); } + + @Override + FieldInfo getFieldInfo() { + return this.fieldInfo; + } } Index: lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (working copy) @@ -27,9 +27,8 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable { - final NormsWriterPerThread perThread; final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final Similarity similarity; // Holds all docID/norm pairs we've seen @@ -46,10 +45,9 @@ upto = 0; } - public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) { - this.perThread = perThread; + public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; - docState = perThread.docState; + docState = docInverterPerField.docState; fieldState = docInverterPerField.fieldState; similarity = docState.similarityProvider.get(fieldInfo.name); } Index: lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class TermsHashConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); - abstract public void abort(); -} Index: lucene/src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldsWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/FieldsWriter.java (working copy) @@ -2,13 +2,13 @@ /** * Copyright 2004 The Apache Software Foundation - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -22,15 +22,14 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMOutputStream; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.IOUtils; final class FieldsWriter { static final byte FIELD_IS_TOKENIZED = 0x1; static final byte FIELD_IS_BINARY = 0x2; - + // Lucene 3.0: Removal of compressed fields static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; @@ -38,7 +37,7 @@ // than the current one, and always change this if you // switch to a new format! static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - + // when removing support for old versions, leave the last supported version here static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; @@ -83,10 +82,9 @@ // and adds a new entry for this document into the index // stream. This assumes the buffer was already written // in the correct fields format. - void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException { + void startDocument(int numStoredFields) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); fieldsStream.writeVInt(numStoredFields); - buffer.writeTo(fieldsStream); } void skipDocument() throws IOException { @@ -121,8 +119,8 @@ } } - final void writeField(FieldInfo fi, Fieldable field) throws IOException { - fieldsStream.writeVInt(fi.number); + final void writeField(int fieldNumber, Fieldable field) throws IOException { + fieldsStream.writeVInt(fieldNumber); byte bits = 0; if (field.isTokenized()) bits |= FieldsWriter.FIELD_IS_TOKENIZED; @@ -175,10 +173,9 @@ fieldsStream.writeVInt(storedCount); - for (Fieldable field : fields) { if (field.isStored()) - writeField(fieldInfos.fieldInfo(field.name()), field); + writeField(fieldInfos.fieldNumber(field.name()), field); } } } Index: lucene/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -37,14 +37,14 @@ /** * Information about a segment such as it's name, directory, and files related * to the segment. - * + * * @lucene.experimental */ public final class SegmentInfo { static final int NO = -1; // e.g. no norms; no deletes; static final int YES = 1; // e.g. have norms; have deletes; - static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. + static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. public String name; // unique name in dir public int docCount; // number of docs in seg @@ -56,7 +56,7 @@ * - YES or higher if there are deletes at generation N */ private long delGen; - + /* * Current generation of each field's norm file. If this array is null, * means no separate norms. If this array is not null, its values mean: @@ -65,7 +65,7 @@ */ private Map normGen; - private boolean isCompoundFile; + private boolean isCompoundFile; private volatile List files; // cached list of files that this segment uses // in the Directory @@ -73,11 +73,11 @@ private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand) private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand) - private int docStoreOffset; // if this segment shares stored fields & vectors, this + @Deprecated private int docStoreOffset; // if this segment shares stored fields & vectors, this // offset is where in that file this segment's docs begin - private String docStoreSegment; // name used to derive fields/vectors file we share with + @Deprecated private String docStoreSegment; // name used to derive fields/vectors file we share with // other segments - private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) + @Deprecated private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) private int delCount; // How many deleted docs in this segment @@ -91,9 +91,9 @@ private Map diagnostics; - // Tracks the Lucene version this segment was created with, since 3.1. Null + // Tracks the Lucene version this segment was created with, since 3.1. Null // indicates an older than 3.0 index, and it's used to detect a too old index. - // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and + // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and // specific versions afterwards ("3.0", "3.1" etc.). // see Constants.LUCENE_MAIN_VERSION. private String version; @@ -101,7 +101,7 @@ // NOTE: only used in-RAM by IW to track buffered deletes; // this is never written to/read from the Directory private long bufferedDeletesGen; - + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) { this.name = name; @@ -182,11 +182,13 @@ docStoreSegment = name; docStoreIsCompoundFile = false; } + if (format > DefaultSegmentInfosWriter.FORMAT_4_0) { // pre-4.0 indexes write a byte if there is a single norms file byte b = input.readByte(); assert 1 == b; } + int numNormGen = input.readInt(); if (numNormGen == NO) { normGen = null; @@ -207,7 +209,7 @@ assert delCount <= docCount; hasProx = input.readByte() == YES; - + // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name); if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) { segmentCodecs = new SegmentCodecs(codecs, input); @@ -217,7 +219,7 @@ segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")}); } diagnostics = input.readStringStringMap(); - + if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) { hasVectors = input.readByte() == 1; } else { @@ -366,7 +368,7 @@ // against this segment return null; } else { - return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); + return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -432,7 +434,7 @@ if (hasSeparateNorms(number)) { return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number)); } else { - // single file for all norms + // single file for all norms return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN); } } @@ -465,39 +467,46 @@ assert delCount <= docCount; } + @Deprecated public int getDocStoreOffset() { return docStoreOffset; } - + + @Deprecated public boolean getDocStoreIsCompoundFile() { return docStoreIsCompoundFile; } - - void setDocStoreIsCompoundFile(boolean v) { - docStoreIsCompoundFile = v; + + @Deprecated + public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) { + this.docStoreIsCompoundFile = docStoreIsCompoundFile; clearFilesCache(); } - + + @Deprecated + void setDocStore(int offset, String segment, boolean isCompoundFile) { + docStoreOffset = offset; + docStoreSegment = segment; + docStoreIsCompoundFile = isCompoundFile; + clearFilesCache(); + } + + @Deprecated public String getDocStoreSegment() { return docStoreSegment; } - - public void setDocStoreSegment(String segment) { - docStoreSegment = segment; - } - + + @Deprecated void setDocStoreOffset(int offset) { docStoreOffset = offset; clearFilesCache(); } - void setDocStore(int offset, String segment, boolean isCompoundFile) { - docStoreOffset = offset; - docStoreSegment = segment; - docStoreIsCompoundFile = isCompoundFile; - clearFilesCache(); + @Deprecated + public void setDocStoreSegment(String docStoreSegment) { + this.docStoreSegment = docStoreSegment; } - + /** Save this segment's info. */ public void write(IndexOutput output) throws IOException { @@ -507,12 +516,14 @@ output.writeString(name); output.writeInt(docCount); output.writeLong(delGen); + output.writeInt(docStoreOffset); if (docStoreOffset != -1) { output.writeString(docStoreSegment); output.writeByte((byte) (docStoreIsCompoundFile ? 1:0)); } + if (normGen == null) { output.writeInt(NO); } else { @@ -522,7 +533,7 @@ output.writeLong(entry.getValue()); } } - + output.writeByte((byte) (isCompoundFile ? YES : NO)); output.writeInt(delCount); output.writeByte((byte) (hasProx ? 1:0)); @@ -570,9 +581,9 @@ // Already cached: return files; } - + Set fileSet = new HashSet(); - + boolean useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { @@ -606,7 +617,7 @@ fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - } + } } String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); @@ -644,7 +655,7 @@ } /** Used for debugging. Format may suddenly change. - * + * *

Current format looks like * _a(3.1):c45/4->_1, which means the segment's * name is _a; it was created with Lucene 3.1 (or @@ -674,7 +685,7 @@ if (delCount != 0) { s.append('/').append(delCount); } - + if (docStoreOffset != -1) { s.append("->").append(docStoreSegment); if (docStoreIsCompoundFile) { @@ -714,13 +725,13 @@ * NOTE: this method is used for internal purposes only - you should * not modify the version of a SegmentInfo, or it may result in unexpected * exceptions thrown when you attempt to open the index. - * + * * @lucene.internal */ public void setVersion(String version) { this.version = version; } - + /** Returns the version of the code which wrote the segment. */ public String getVersion() { return version; Index: lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (working copy) @@ -81,6 +81,6 @@ } public int getAddress() { - return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); + return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK); } } \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (working copy) @@ -17,21 +17,23 @@ * limitations under the License. */ -import java.util.Collection; +import java.io.IOException; import java.util.Map; -import java.io.IOException; abstract class InvertedDocConsumer { - /** Add a new thread */ - abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - /** Abort (called after hitting AbortException) */ abstract void abort(); /** Flush a new segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; + abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + + abstract void startDocument() throws IOException; + + abstract void finishDocument() throws IOException; + /** Attempt to free RAM, returning true if any RAM was * freed */ abstract boolean freeRAM(); Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (working copy) @@ -18,22 +18,25 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class DocFieldConsumer { /** Called when DocumentsWriter decides to create a new * segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; /** Called when an aborting exception is hit */ abstract void abort(); - /** Add a new thread */ - abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException; - /** Called when DocumentsWriter is using too much RAM. * The consumer should free RAM, if possible, returning * true if any RAM was in fact freed. */ abstract boolean freeRAM(); - } + + abstract void startDocument() throws IOException; + + abstract DocFieldConsumerPerField addField(FieldInfo fi); + + abstract void finishDocument() throws IOException; + +} Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (working copy) @@ -24,4 +24,5 @@ /** Processes all occurrences of a single field */ abstract void processFields(Fieldable[] fields, int count) throws IOException; abstract void abort(); + abstract FieldInfo getFieldInfo(); } Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -1,307 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.io.IOException; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -/** - * Gathers all Fieldables for a document under the same - * name, updates FieldInfos, and calls per-field consumers - * to process field by field. - * - * Currently, only a single thread visits the fields, - * sequentially, for processing. - */ - -final class DocFieldProcessorPerThread extends DocConsumerPerThread { - - float docBoost; - int fieldGen; - final DocFieldProcessor docFieldProcessor; - final DocFieldConsumerPerThread consumer; - - // Holds all fields seen in current doc - DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; - int fieldCount; - - // Hash table for all fields seen in current segment - DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; - int hashMask = 1; - int totalFieldCount; - - final StoredFieldsWriterPerThread fieldsWriter; - - final DocumentsWriter.DocState docState; - - public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException { - this.docState = threadState.docState; - this.docFieldProcessor = docFieldProcessor; - this.consumer = docFieldProcessor.consumer.addThread(this); - fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState); - } - - @Override - public void abort() { - for(int i=0;i fields() { - Collection fields = new HashSet(); - for(int i=0;i fieldHash.length; - - final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; - - // Rehash - int newHashMask = newHashSize-1; - for(int j=0;j docFields = doc.getFields(); - final int numDocFields = docFields.size(); - - // Absorb any new fields first seen in this document. - // Also absorb any changes to fields we had already - // seen before (eg suddenly turning on norms or - // vectors, etc.): - - for(int i=0;i= fieldHash.length/2) - rehash(); - } else { - fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), - field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false, field.getOmitTermFreqAndPositions()); - } - if (thisFieldGen != fp.lastGen) { - - // First time we're seeing this field for this doc - fp.fieldCount = 0; - - if (fieldCount == fields.length) { - final int newSize = fields.length*2; - DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; - System.arraycopy(fields, 0, newArray, 0, fieldCount); - fields = newArray; - } - - fields[fieldCount++] = fp; - fp.lastGen = thisFieldGen; - } - - if (fp.fieldCount == fp.fields.length) { - Fieldable[] newArray = new Fieldable[fp.fields.length*2]; - System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount); - fp.fields = newArray; - } - - fp.fields[fp.fieldCount++] = field; - if (field.isStored()) { - fieldsWriter.addField(field, fp.fieldInfo); - } - } - - // If we are writing vectors then we must visit - // fields in sorted order so they are written in - // sorted order. TODO: we actually only need to - // sort the subset of fields that have vectors - // enabled; we could save [small amount of] CPU - // here. - ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); - - for(int i=0;i fieldsComp = new Comparator() { - public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { - return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); - } - }; - - PerDoc[] docFreeList = new PerDoc[1]; - int freeCount; - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else - return docFreeList[--freeCount]; - } - - synchronized void freePerDoc(PerDoc perDoc) { - assert freeCount < docFreeList.length; - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - - DocumentsWriter.DocWriter one; - DocumentsWriter.DocWriter two; - - @Override - public long sizeInBytes() { - return one.sizeInBytes() + two.sizeInBytes(); - } - - @Override - public void finish() throws IOException { - try { - try { - one.finish(); - } finally { - two.finish(); - } - } finally { - freePerDoc(this); - } - } - - @Override - public void abort() { - try { - try { - one.abort(); - } finally { - two.abort(); - } - } finally { - freePerDoc(this); - } - } - } -} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (working copy) @@ -17,12 +17,13 @@ * limitations under the License. */ -import java.util.Collection; +import java.io.IOException; import java.util.Map; -import java.io.IOException; abstract class InvertedDocEndConsumer { - abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; abstract void abort(); + abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + abstract void startDocument() throws IOException; + abstract void finishDocument() throws IOException; } Index: lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (working copy) @@ -18,11 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class TermsHashConsumer { - abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread); - abstract void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException; abstract void abort(); - } + abstract void startDocument() throws IOException; + abstract void finishDocument(TermsHash termsHash) throws IOException; + abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); +} Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -75,7 +75,7 @@ protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; protected boolean calibrateSizeByDeletes = true; - + protected boolean useCompoundFile = true; public LogMergePolicy() { @@ -103,7 +103,7 @@ } this.noCFSRatio = noCFSRatio; } - + protected void message(String message) { if (verbose()) writer.get().message("LMP: " + message); @@ -169,7 +169,7 @@ this.calibrateSizeByDeletes = calibrateSizeByDeletes; } - /** Returns true if the segment size should be calibrated + /** Returns true if the segment size should be calibrated * by the number of deletes when choosing segments for merge. */ public boolean getCalibrateSizeByDeletes() { return calibrateSizeByDeletes; @@ -189,7 +189,7 @@ return info.docCount; } } - + protected long sizeBytes(SegmentInfo info) throws IOException { long byteSize = info.sizeInBytes(true); if (calibrateSizeByDeletes) { @@ -201,7 +201,7 @@ return byteSize; } } - + protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set segmentsToOptimize) throws IOException { final int numSegments = infos.size(); int numToOptimize = 0; @@ -273,7 +273,7 @@ return spec.merges.size() == 0 ? null : spec; } - + /** * Returns the merges necessary to optimize the index. This method constraints * the returned merges only by the {@code maxNumSegments} parameter, and @@ -281,7 +281,7 @@ */ private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException { MergeSpecification spec = new MergeSpecification(); - + // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { @@ -331,7 +331,7 @@ } return spec.merges.size() == 0 ? null : spec; } - + /** Returns the merges necessary to optimize the index. * This merge policy defines "optimized" to mean only the * requested number of segments is left in the index, and @@ -379,7 +379,7 @@ } return null; } - + // There is only one segment already, and it is optimized if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) { if (verbose()) { @@ -397,7 +397,7 @@ break; } } - + if (anyTooLarge) { return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last); } else { @@ -409,7 +409,7 @@ * Finds merges necessary to expunge all deletes from the * index. We simply merge adjacent segments that have * deletes, up to mergeFactor at a time. - */ + */ @Override public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { @@ -462,7 +462,7 @@ SegmentInfo info; float level; int index; - + public SegmentInfoAndLevel(SegmentInfo info, float level, int index) { this.info = info; this.level = level; @@ -658,5 +658,5 @@ sb.append("]"); return sb.toString(); } - + } Index: lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (working copy) @@ -1,79 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.document.Fieldable; - -final class StoredFieldsWriterPerThread { - - final FieldsWriter localFieldsWriter; - final StoredFieldsWriter storedFieldsWriter; - final DocumentsWriter.DocState docState; - - StoredFieldsWriter.PerDoc doc; - - public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException { - this.storedFieldsWriter = storedFieldsWriter; - this.docState = docState; - localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null); - } - - public void startDocument() { - if (doc != null) { - // Only happens if previous document hit non-aborting - // exception while writing stored fields into - // localFieldsWriter: - doc.reset(); - doc.docID = docState.docID; - } - } - - public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { - if (doc == null) { - doc = storedFieldsWriter.getPerDoc(); - doc.docID = docState.docID; - localFieldsWriter.setFieldsStream(doc.fdt); - assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields; - assert 0 == doc.fdt.length(); - assert 0 == doc.fdt.getFilePointer(); - } - - localFieldsWriter.writeField(fieldInfo, field); - assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); - doc.numStoredFields++; - } - - public DocumentsWriter.DocWriter finishDocument() { - // If there were any stored fields in this doc, doc will - // be non-null; else it's null. - try { - return doc; - } finally { - doc = null; - } - } - - public void abort() { - if (doc != null) { - doc.abort(); - doc = null; - } - } -} Index: lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (working copy) @@ -18,7 +18,8 @@ */ import java.io.IOException; -import org.apache.lucene.store.RAMOutputStream; + +import org.apache.lucene.document.Fieldable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -26,22 +27,38 @@ final class StoredFieldsWriter { FieldsWriter fieldsWriter; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; int lastDocID; - PerDoc[] docFreeList = new PerDoc[1]; int freeCount; - public StoredFieldsWriter(DocumentsWriter docWriter) { + final DocumentsWriterPerThread.DocState docState; + + public StoredFieldsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + this.docState = docWriter.docState; } - public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException { - return new StoredFieldsWriterPerThread(docState, this); + private int numStoredFields; + private Fieldable[] storedFields; + private int[] fieldNumbers; + + public void reset() { + numStoredFields = 0; + storedFields = new Fieldable[1]; + fieldNumbers = new int[1]; } - synchronized public void flush(SegmentWriteState state) throws IOException { - if (state.numDocs > lastDocID) { + public void startDocument() { + reset(); + } + + public void flush(SegmentWriteState state) throws IOException { + + if (state.numDocs > 0) { + // It's possible that all documents seen in this segment + // hit non-aborting exceptions, in which case we will + // not have yet init'd the FieldsWriter: initFieldsWriter(); fill(state.numDocs); } @@ -67,23 +84,9 @@ int allocCount; - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } + void abort() { + reset(); - synchronized void abort() { if (fieldsWriter != null) { fieldsWriter.abort(); fieldsWriter = null; @@ -101,53 +104,40 @@ } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + void finishDocument() throws IOException { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start"); + initFieldsWriter(); + fill(docState.docID); - fill(perDoc.docID); + if (fieldsWriter != null && numStoredFields > 0) { + fieldsWriter.startDocument(numStoredFields); + for (int i = 0; i < numStoredFields; i++) { + fieldsWriter.writeField(fieldNumbers[i], storedFields[i]); + } + lastDocID++; + } - // Append stored fields to the real FieldsWriter: - fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt); - lastDocID++; - perDoc.reset(); - free(perDoc); + reset(); assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end"); } - synchronized void free(PerDoc perDoc) { - assert freeCount < docFreeList.length; - assert 0 == perDoc.numStoredFields; - assert 0 == perDoc.fdt.length(); - assert 0 == perDoc.fdt.getFilePointer(); - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer(); - RAMOutputStream fdt = new RAMOutputStream(buffer); - int numStoredFields; - - void reset() { - fdt.reset(); - buffer.recycle(); - numStoredFields = 0; + public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { + if (numStoredFields == storedFields.length) { + int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + Fieldable[] newArray = new Fieldable[newSize]; + System.arraycopy(storedFields, 0, newArray, 0, numStoredFields); + storedFields = newArray; } - @Override - void abort() { - reset(); - free(this); + if (numStoredFields == fieldNumbers.length) { + fieldNumbers = ArrayUtil.grow(fieldNumbers); } - @Override - public long sizeInBytes() { - return buffer.getSizeInBytes(); - } + storedFields[numStoredFields] = field; + fieldNumbers[numStoredFields] = fieldInfo.number; + numStoredFields++; - @Override - public void finish() throws IOException { - finishDocument(this); - } + assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); } } Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class DocFieldConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract DocFieldConsumerPerField addField(FieldInfo fi); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/IntBlockPool.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IntBlockPool.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/IntBlockPool.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import java.util.Arrays; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,24 +24,24 @@ public int[][] buffers = new int[10][]; int bufferUpto = -1; // Which buffer we are upto - public int intUpto = DocumentsWriter.INT_BLOCK_SIZE; // Where we are in head buffer + public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE; // Where we are in head buffer public int[] buffer; // Current head buffer - public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE; // Current head offset + public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE; // Current head offset - final private DocumentsWriter docWriter; + final private DocumentsWriterPerThread docWriter; - public IntBlockPool(DocumentsWriter docWriter) { + public IntBlockPool(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; } public void reset() { if (bufferUpto != -1) { - if (bufferUpto > 0) - // Recycle all but the first buffer - docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto); - // Reuse first buffer + if (bufferUpto > 0) { + docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1); + Arrays.fill(buffers, 1, bufferUpto, null); + } bufferUpto = 0; intUpto = 0; intOffset = 0; @@ -57,7 +59,7 @@ bufferUpto++; intUpto = 0; - intOffset += DocumentsWriter.INT_BLOCK_SIZE; + intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE; } } Index: lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (working copy) @@ -1,34 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class DocConsumerPerThread { - - /** Process the document. If there is - * something for this document to be done in docID order, - * you should encapsulate that as a - * DocumentsWriter.DocWriter and return it. - * DocumentsWriter then calls finish() on this object - * when it's its turn. */ - abstract DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException; - - abstract void doAfterFlush(); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (working copy) @@ -1,115 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Comparator; - -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.BytesRef; - -import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; - -// TODO FI: some of this is "generic" to TermsHash* so we -// should factor it out so other consumers don't have to -// duplicate this code - -/** Used by DocumentsWriter to merge the postings from - * multiple ThreadStates when creating a segment */ -final class FreqProxFieldMergeState { - - final FreqProxTermsWriterPerField field; - final int numPostings; - private final ByteBlockPool bytePool; - final int[] termIDs; - final FreqProxPostingsArray postings; - int currentTermID; - - final BytesRef text = new BytesRef(); - - private int postingUpto = -1; - - final ByteSliceReader freq = new ByteSliceReader(); - final ByteSliceReader prox = new ByteSliceReader(); - - int docID; - int termFreq; - - public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, Comparator termComp) { - this.field = field; - this.numPostings = field.termsHashPerField.bytesHash.size(); - this.bytePool = field.perThread.termsHashPerThread.bytePool; - this.termIDs = field.termsHashPerField.sortPostings(termComp); - this.postings = (FreqProxPostingsArray) field.termsHashPerField.postingsArray; - } - - boolean nextTerm() throws IOException { - postingUpto++; - if (postingUpto == numPostings) { - return false; - } - - currentTermID = termIDs[postingUpto]; - docID = 0; - - // Get BytesRef - final int textStart = postings.textStarts[currentTermID]; - bytePool.setBytesRef(text, textStart); - - field.termsHashPerField.initReader(freq, currentTermID, 0); - if (!field.fieldInfo.omitTermFreqAndPositions) { - field.termsHashPerField.initReader(prox, currentTermID, 1); - } - - // Should always be true - boolean result = nextDoc(); - assert result; - - return true; - } - - public boolean nextDoc() throws IOException { - if (freq.eof()) { - if (postings.lastDocCodes[currentTermID] != -1) { - // Return last doc - docID = postings.lastDocIDs[currentTermID]; - if (!field.omitTermFreqAndPositions) - termFreq = postings.docFreqs[currentTermID]; - postings.lastDocCodes[currentTermID] = -1; - return true; - } else - // EOF - return false; - } - - final int code = freq.readVInt(); - if (field.omitTermFreqAndPositions) - docID += code; - else { - docID += code >>> 1; - if ((code & 1) != 0) - termFreq = 1; - else - termFreq = freq.readVInt(); - } - - assert docID != postings.lastDocIDs[currentTermID]; - - return true; - } -} Index: lucene/src/java/org/apache/lucene/index/TermsHash.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHash.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermsHash.java (working copy) @@ -18,12 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; import java.util.Map; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; + /** This class implements {@link InvertedDocConsumer}, which * is passed each token produced by the analyzer on each * field. It stores these tokens in a hash table, and @@ -36,78 +36,118 @@ final TermsHashConsumer consumer; final TermsHash nextTermsHash; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; - boolean trackAllocations; + final IntBlockPool intPool; + final ByteBlockPool bytePool; + ByteBlockPool termBytePool; - public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) { + final boolean primary; + final DocumentsWriterPerThread.DocState docState; + + // Used when comparing postings via termRefComp, in TermsHashPerField + final BytesRef tr1 = new BytesRef(); + final BytesRef tr2 = new BytesRef(); + + // Used by perField to obtain terms from the analysis chain + final BytesRef termBytesRef = new BytesRef(10); + + final boolean trackAllocations; + + + public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) { + this.docState = docWriter.docState; this.docWriter = docWriter; this.consumer = consumer; + this.trackAllocations = trackAllocations; this.nextTermsHash = nextTermsHash; - this.trackAllocations = trackAllocations; + intPool = new IntBlockPool(docWriter); + bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); + + if (nextTermsHash != null) { + // We are primary + primary = true; + termBytePool = bytePool; + nextTermsHash.termBytePool = bytePool; + } else { + primary = false; + } } @Override - InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null); + public void abort() { + reset(); + try { + consumer.abort(); + } finally { + if (nextTermsHash != null) { + nextTermsHash.abort(); + } + } } - TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread); - } + // Clear all state + void reset() { + intPool.reset(); + bytePool.reset(); - @Override - public void abort() { - consumer.abort(); - if (nextTermsHash != null) - nextTermsHash.abort(); + if (primary) { + bytePool.reset(); + } } @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> nextThreadsAndFields; + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { + Map childFields = new HashMap(); + Map nextChildFields; - if (nextTermsHash != null) - nextThreadsAndFields = new HashMap>(); - else - nextThreadsAndFields = null; + if (nextTermsHash != null) { + nextChildFields = new HashMap(); + } else { + nextChildFields = null; + } - for (final Map.Entry> entry : threadsAndFields.entrySet()) { + for (final Map.Entry entry : fieldsToFlush.entrySet()) { + TermsHashPerField perField = (TermsHashPerField) entry.getValue(); + childFields.put(entry.getKey(), perField.consumer); + if (nextTermsHash != null) { + nextChildFields.put(entry.getKey(), perField.nextPerField); + } + } - TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey(); + consumer.flush(childFields, state); - Collection fields = entry.getValue(); + if (nextTermsHash != null) { + nextTermsHash.flush(nextChildFields, state); + } + } - Iterator fieldsIt = fields.iterator(); - Collection childFields = new HashSet(); - Collection nextChildFields; + @Override + InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { + return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo); + } - if (nextTermsHash != null) - nextChildFields = new HashSet(); - else - nextChildFields = null; + @Override + public boolean freeRAM() { + return false; + } - while(fieldsIt.hasNext()) { - TermsHashPerField perField = (TermsHashPerField) fieldsIt.next(); - childFields.add(perField.consumer); - if (nextTermsHash != null) - nextChildFields.add(perField.nextPerField); + @Override + void finishDocument() throws IOException { + try { + consumer.finishDocument(this); + } finally { + if (nextTermsHash != null) { + nextTermsHash.consumer.finishDocument(nextTermsHash); } - - childThreadsAndFields.put(perThread.consumer, childFields); - if (nextTermsHash != null) - nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields); } - - consumer.flush(childThreadsAndFields, state); - - if (nextTermsHash != null) - nextTermsHash.flush(nextThreadsAndFields, state); } @Override - synchronized public boolean freeRAM() { - return false; + void startDocument() throws IOException { + consumer.startDocument(); + if (nextTermsHash != null) { + nextTermsHash.consumer.startDocument(); + } } } Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -39,24 +39,24 @@ /** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, - * into a single Segment. After adding the appropriate readers, call the merge method to combine the + * into a single Segment. After adding the appropriate readers, call the merge method to combine the * segments. - * + * * @see #merge * @see #add */ final class SegmentMerger { - + /** norms header placeholder */ - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; - + static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + private Directory directory; private String segment; private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; private List readers = new ArrayList(); private final FieldInfos fieldInfos; - + private int mergedDocs; private final MergeState.CheckAbort checkAbort; @@ -64,13 +64,13 @@ /** Maximum number of contiguous documents to bulk-copy when merging stored fields */ private final static int MAX_RAW_MERGE_DOCS = 4192; - + private final CodecProvider codecs; private Codec codec; private SegmentWriteState segmentWriteState; private PayloadProcessorProvider payloadProcessorProvider; - + SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) { this.payloadProcessorProvider = payloadProcessorProvider; directory = dir; @@ -135,10 +135,10 @@ for (String file : files) { cfsWriter.addFile(file); } - + // Perform the merge cfsWriter.close(); - + return files; } @@ -196,13 +196,12 @@ } /** - * + * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ private int mergeFields() throws CorruptIndexException, IOException { - for (IndexReader reader : readers) { if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; @@ -265,7 +264,7 @@ throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption"); segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null); - + return docCount; } @@ -283,7 +282,7 @@ ++j; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { @@ -295,7 +294,7 @@ break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; @@ -349,7 +348,7 @@ * @throws IOException */ private final void mergeVectors() throws IOException { - TermVectorsWriter termVectorsWriter = + TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { @@ -369,7 +368,7 @@ copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); } else { copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); - + } } } finally { @@ -402,7 +401,7 @@ ++docNum; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { @@ -414,7 +413,7 @@ break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.work(300 * numDocs); @@ -425,7 +424,7 @@ // skip deleted docs continue; } - + // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); @@ -434,7 +433,7 @@ } } } - + private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter, final TermVectorsReader matchingVectorsReader, final IndexReader reader) @@ -470,7 +469,7 @@ // Let CodecProvider decide which codec will be used to write // the new segment: - + int docBase = 0; final List fields = new ArrayList(); @@ -498,7 +497,7 @@ mergeState.readerCount = readers.size(); mergeState.fieldInfos = fieldInfos; mergeState.mergedDocCount = mergedDocs; - + // Remap docIDs mergeState.delCounts = new int[mergeState.readerCount]; mergeState.docMaps = new int[mergeState.readerCount][]; @@ -536,7 +535,7 @@ } assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount; } - + if (payloadProcessorProvider != null) { mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory()); } @@ -549,7 +548,7 @@ // apart when we step through the docs enums in // MultiDocsEnum. mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts); - + try { consumer.merge(mergeState, new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), @@ -568,7 +567,7 @@ int[] getDelCounts() { return mergeState.delCounts; } - + public boolean getAnyNonBulkMerges() { assert matchedCount <= readers.size(); return matchedCount != readers.size(); @@ -579,7 +578,7 @@ try { for (FieldInfo fi : fieldInfos) { if (fi.isIndexed && !fi.omitNorms) { - if (output == null) { + if (output == null) { output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } @@ -610,7 +609,7 @@ } } } finally { - if (output != null) { + if (output != null) { output.close(); } } Index: lucene/src/java/org/apache/lucene/index/TermsHashPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (working copy) @@ -34,9 +34,10 @@ final TermsHashConsumerPerField consumer; + final TermsHash termsHash; + final TermsHashPerField nextPerField; - final TermsHashPerThread perThread; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; TermToBytesRefAttribute termAtt; BytesRef termBytesRef; @@ -52,27 +53,27 @@ final FieldInfo fieldInfo; final BytesRefHash bytesHash; - + ParallelPostingsArray postingsArray; private final AtomicLong bytesUsed; - public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { - this.perThread = perThread; - intPool = perThread.intPool; - bytePool = perThread.bytePool; - termBytePool = perThread.termBytePool; - docState = perThread.docState; - bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong(); - + public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) { + intPool = termsHash.intPool; + bytePool = termsHash.bytePool; + termBytePool = termsHash.termBytePool; + docState = termsHash.docState; + this.termsHash = termsHash; + bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed + : new AtomicLong(); fieldState = docInverterPerField.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); + this.consumer = termsHash.consumer.addField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); - bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); + bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.getStreamCount(); numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; - if (nextPerThread != null) - nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); + if (nextTermsHash != null) + nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo); else nextPerField = null; } @@ -80,7 +81,7 @@ void shrinkHash(int targetSize) { // Fully free the bytesHash on each flush but keep the pool untouched // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too - bytesHash.clear(false); + bytesHash.clear(false); } public void reset() { @@ -90,7 +91,7 @@ } @Override - synchronized public void abort() { + public void abort() { reset(); if (nextPerField != null) nextPerField.abort(); @@ -99,14 +100,13 @@ public void initReader(ByteSliceReader reader, int termID, int stream) { assert stream < streamCount; int intStart = postingsArray.intStarts[termID]; - final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK; + final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; reader.init(bytePool, postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } - /** Collapse the hash table & sort in-place. */ public int[] sortPostings(Comparator termComp) { return bytesHash.sort(termComp); @@ -124,7 +124,7 @@ nextPerField.start(f); } } - + @Override boolean start(Fieldable[] fields, int count) throws IOException { doCall = consumer.start(fields, count); @@ -143,11 +143,10 @@ // First time we are seeing this token since we last // flushed the hash. // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) intPool.nextBuffer(); - if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) - bytePool.nextBuffer(); + if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.nextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; @@ -166,8 +165,8 @@ } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } } @@ -192,7 +191,7 @@ if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { - termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8); + termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8); docState.maxTermPrefix = termBytesRef.toString(); } finally { termBytesRef.length = saved; @@ -204,7 +203,7 @@ if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) { intPool.nextBuffer(); } @@ -229,8 +228,8 @@ } else { termID = (-termID)-1; final int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } @@ -278,7 +277,7 @@ if (nextPerField != null) nextPerField.finish(); } - + private static final class PostingsBytesStartArray extends BytesStartArray { private final TermsHashPerField perField; @@ -289,10 +288,10 @@ this.perField = perField; this.bytesUsed = bytesUsed; } - + @Override public int[] init() { - if(perField.postingsArray == null) { + if(perField.postingsArray == null) { perField.postingsArray = perField.consumer.createPostingsArray(2); bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); } @@ -312,7 +311,7 @@ @Override public int[] clear() { if(perField.postingsArray != null) { - bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); + bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting())); perField.postingsArray = null; } return null; Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -19,10 +19,15 @@ import java.io.IOException; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; -import java.util.HashMap; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; + /** * This is a DocConsumer that gathers all fields under the * same name, and calls per-field consumers to process field @@ -33,26 +38,39 @@ final class DocFieldProcessor extends DocConsumer { - final DocumentsWriter docWriter; final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; - public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) { - this.docWriter = docWriter; + // Holds all fields seen in current doc + DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; + int fieldCount; + + // Hash table for all fields ever seen + DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; + int hashMask = 1; + int totalFieldCount; + + float docBoost; + int fieldGen; + final DocumentsWriterPerThread.DocState docState; + + public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) { + this.docState = docWriter.docState; this.consumer = consumer; fieldsWriter = new StoredFieldsWriter(docWriter); } @Override - public void flush(Collection threads, SegmentWriteState state) throws IOException { + public void flush(SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - for ( DocConsumerPerThread thread : threads) { - DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread; - childThreadsAndFields.put(perThread.consumer, perThread.fields()); + Map childFields = new HashMap(); + Collection fields = fields(); + for (DocFieldConsumerPerField f : fields) { + childFields.put(f.getFieldInfo(), f); } + fieldsWriter.flush(state); - consumer.flush(childThreadsAndFields, state); + consumer.flush(childFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, @@ -64,8 +82,20 @@ @Override public void abort() { - fieldsWriter.abort(); - consumer.abort(); + for(int i=0;i fields() { + Collection fields = new HashSet(); + for(int i=0;i fieldHash.length; + + final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; + + // Rehash + int newHashMask = newHashSize-1; + for(int j=0;j docFields = doc.getFields(); + final int numDocFields = docFields.size(); + + // Absorb any new fields first seen in this document. + // Also absorb any changes to fields we had already + // seen before (eg suddenly turning on norms or + // vectors, etc.): + + for(int i=0;i= fieldHash.length/2) + rehash(); + } else { + fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), + field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), + field.getOmitNorms(), false, field.getOmitTermFreqAndPositions()); + } + + if (thisFieldGen != fp.lastGen) { + + // First time we're seeing this field for this doc + fp.fieldCount = 0; + + if (fieldCount == fields.length) { + final int newSize = fields.length*2; + DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; + System.arraycopy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = fp; + fp.lastGen = thisFieldGen; + } + + fp.addField(field); + + if (field.isStored()) { + fieldsWriter.addField(field, fp.fieldInfo); + } + } + + // If we are writing vectors then we must visit + // fields in sorted order so they are written in + // sorted order. TODO: we actually only need to + // sort the subset of fields that have vectors + // enabled; we could save [small amount of] CPU + // here. + quickSort(fields, 0, fieldCount-1); + + for(int i=0;i= hi) + return; + else if (hi == 1+lo) { + if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + final DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[hi]; + array[hi] = tmp; + } + return; + } + + int mid = (lo + hi) >>> 1; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return; + + DocFieldProcessorPerField partition = array[mid]; + + for (; ;) { + while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) + --right; + + while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) + ++left; + + if (left < right) { + DocFieldProcessorPerField tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } else { + break; + } + } + + quickSort(array, lo, left); + quickSort(array, left + 1, hi); + } } Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -19,55 +19,35 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.List; import java.util.Map; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.PostingsConsumer; -import org.apache.lucene.index.codecs.TermStats; -import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; final class FreqProxTermsWriter extends TermsHashConsumer { @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) { - return new FreqProxTermsWriterPerThread(perThread); - } - - @Override void abort() {} - private int flushedDocCount; - // TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... @Override - public void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + public void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { // Gather all FieldData's that have postings, across all // ThreadStates List allFields = new ArrayList(); - - flushedDocCount = state.numDocs; - for (Map.Entry> entry : threadsAndFields.entrySet()) { - - Collection fields = entry.getValue(); - - - for (final TermsHashConsumerPerField i : fields) { - final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) i; - if (perField.termsHashPerField.bytesHash.size() > 0) + for (TermsHashConsumerPerField f : fieldsToFlush.values()) { + final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f; + if (perField.termsHashPerField.bytesHash.size() > 0) { allFields.add(perField); - } + } } final int numAllFields = allFields.size(); @@ -77,6 +57,8 @@ final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); + TermsHash termsHash = null; + /* Current writer chain: FieldsConsumer @@ -89,257 +71,48 @@ -> IMPL: FormatPostingsPositionsWriter */ - int start = 0; - while(start < numAllFields) { - final FieldInfo fieldInfo = allFields.get(start).fieldInfo; - final String fieldName = fieldInfo.name; + for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { + final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo; - int end = start+1; - while(end < numAllFields && allFields.get(end).fieldInfo.name.equals(fieldName)) - end++; - - FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end-start]; - for(int i=start;i> entry : threadsAndFields.entrySet()) { - FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + if (termsHash != null) { + termsHash.reset(); } consumer.close(); } BytesRef payload; - /* Walk through all unique text tokens (Posting - * instances) found in this field and serialize them - * into a single RAM segment. */ - void appendPostings(String fieldName, SegmentWriteState state, - FreqProxTermsWriterPerField[] fields, - FieldsConsumer consumer) - throws CorruptIndexException, IOException { + @Override + public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { + return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); + } - int numFields = fields.length; + @Override + void finishDocument(TermsHash termsHash) throws IOException { + } - final BytesRef text = new BytesRef(); - - final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; - - final TermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo); - final Comparator termComp = termsConsumer.getComparator(); - - for(int i=0;i 0; if (omitTermFreqAndPositions) { @@ -169,7 +177,7 @@ } } } - + @Override ParallelPostingsArray createPostingsArray(int size) { return new FreqProxPostingsArray(size); @@ -212,7 +220,180 @@ return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT; } } - + public void abort() {} + + BytesRef payload; + + /* Walk through all unique text tokens (Posting + * instances) found in this field and serialize them + * into a single RAM segment. */ + void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state) + throws CorruptIndexException, IOException { + + final TermsConsumer termsConsumer = consumer.addField(fieldInfo); + final Comparator termComp = termsConsumer.getComparator(); + + final Term protoTerm = new Term(fieldName); + + final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + + final Map segDeletes; + if (state.segDeletes != null && state.segDeletes.terms.size() > 0) { + segDeletes = state.segDeletes.terms; + } else { + segDeletes = null; + } + + final int[] termIDs = termsHashPerField.sortPostings(termComp); + final int numTerms = termsHashPerField.bytesHash.size(); + final BytesRef text = new BytesRef(); + final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; + final ByteSliceReader freq = new ByteSliceReader(); + final ByteSliceReader prox = new ByteSliceReader(); + + long sumTotalTermFreq = 0; + for (int i = 0; i < numTerms; i++) { + final int termID = termIDs[i]; + // Get BytesRef + final int textStart = postings.textStarts[termID]; + termsHashPerField.bytePool.setBytesRef(text, textStart); + + termsHashPerField.initReader(freq, termID, 0); + if (!fieldInfo.omitTermFreqAndPositions) { + termsHashPerField.initReader(prox, termID, 1); + } + + // TODO: really TermsHashPerField should take over most + // of this loop, including merge sort of terms from + // multiple threads and interacting with the + // TermsConsumer, only calling out to us (passing us the + // DocsConsumer) to handle delivery of docs/positions + + final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text); + + final int delDocLimit; + if (segDeletes != null) { + final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text)); + if (docIDUpto != null) { + delDocLimit = docIDUpto; + } else { + delDocLimit = 0; + } + } else { + delDocLimit = 0; + } + + // Now termStates has numToMerge FieldMergeStates + // which all share the same term. Now we must + // interleave the docID streams. + int numDocs = 0; + long totTF = 0; + int docID = 0; + int termFreq = 0; + + while(true) { + if (freq.eof()) { + if (postings.lastDocCodes[termID] != -1) { + // Return last doc + docID = postings.lastDocIDs[termID]; + if (!omitTermFreqAndPositions) { + termFreq = postings.docFreqs[termID]; + } + postings.lastDocCodes[termID] = -1; + } else { + // EOF + break; + } + } else { + final int code = freq.readVInt(); + if (omitTermFreqAndPositions) { + docID += code; + } else { + docID += code >>> 1; + if ((code & 1) != 0) { + termFreq = 1; + } else { + termFreq = freq.readVInt(); + } + } + + assert docID != postings.lastDocIDs[termID]; + } + + numDocs++; + assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs; + final int termDocFreq = termFreq; + + // NOTE: we could check here if the docID was + // deleted, and skip it. However, this is somewhat + // dangerous because it can yield non-deterministic + // behavior since we may see the docID before we see + // the term that caused it to be deleted. This + // would mean some (but not all) of its postings may + // make it into the index, which'd alter the docFreq + // for those terms. We could fix this by doing two + // passes, ie first sweep marks all del docs, and + // 2nd sweep does the real flush, but I suspect + // that'd add too much time to flush. + postingsConsumer.startDoc(docID, termDocFreq); + if (docID < delDocLimit) { + // Mark it deleted. TODO: we could also skip + // writing its postings; this would be + // deterministic (just for this Term's docs). + if (state.deletedDocs == null) { + state.deletedDocs = new BitVector(state.numDocs); + } + state.deletedDocs.set(docID); + } + + // Carefully copy over the prox + payload info, + // changing the format to match Lucene's segment + // format. + if (!currentFieldOmitTermFreqAndPositions) { + // omitTermFreqAndPositions == false so we do write positions & + // payload + int position = 0; + totTF += termDocFreq; + for(int j=0;j> 1; + + final int payloadLength; + final BytesRef thisPayload; + + if ((code & 1) != 0) { + // This position has a payload + payloadLength = prox.readVInt(); + + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payload.bytes.length < payloadLength) { + payload.grow(payloadLength); + } + + prox.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; + thisPayload = payload; + + } else { + payloadLength = 0; + thisPayload = null; + } + + postingsConsumer.addPosition(position, thisPayload); + } + + postingsConsumer.finishDoc(); + } + } + termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); + sumTotalTermFreq += totTF; + } + + termsConsumer.finish(sumTotalTermFreq); + } + } Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (working copy) @@ -17,87 +17,62 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; -import java.io.IOException; -import java.util.Collection; - -import java.util.Map; - final class TermVectorsTermsWriter extends TermsHashConsumer { - final DocumentsWriter docWriter; - PerDoc[] docFreeList = new PerDoc[1]; + final DocumentsWriterPerThread docWriter; int freeCount; IndexOutput tvx; IndexOutput tvd; IndexOutput tvf; int lastDocID; + + final DocumentsWriterPerThread.DocState docState; + final BytesRef flushTerm = new BytesRef(); + + // Used by perField when serializing the term vectors + final ByteSliceReader vectorSliceReader = new ByteSliceReader(); boolean hasVectors; - public TermVectorsTermsWriter(DocumentsWriter docWriter) { + public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + docState = docWriter.docState; } @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) { - return new TermVectorsTermsWriterPerThread(termsHashPerThread, this); - } - - @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { if (tvx != null) { // At least one doc in this run had term vectors enabled fill(state.numDocs); + assert state.segmentName != null; + String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.close(); tvf.close(); tvd.close(); - tvx = tvd = tvf = null; - assert state.segmentName != null; - String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); - if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) { + tvx = null; + if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName)); - } lastDocID = 0; state.hasVectors = hasVectors; hasVectors = false; } - for (Map.Entry> entry : threadsAndFields.entrySet()) { - for (final TermsHashConsumerPerField field : entry.getValue() ) { - TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; - perField.termsHashPerField.reset(); - perField.shrinkHash(); - } - - TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) { + TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; + perField.termsHashPerField.reset(); + perField.shrinkHash(); } } - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } - /** Fills in no-term-vectors for all docs we haven't seen * since the last doc that had term vectors. */ void fill(int docID) throws IOException { @@ -112,18 +87,17 @@ } } - synchronized void initTermVectorsWriter() throws IOException { + private final void initTermVectorsWriter() throws IOException { if (tvx == null) { // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: - hasVectors = true; tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION)); tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - + tvx.writeInt(TermVectorsReader.FORMAT_CURRENT); tvd.writeInt(TermVectorsReader.FORMAT_CURRENT); tvf.writeInt(TermVectorsReader.FORMAT_CURRENT); @@ -132,39 +106,44 @@ } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + @Override + void finishDocument(TermsHash termsHash) throws IOException { assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start"); + if (!hasVectors) { + return; + } + initTermVectorsWriter(); - fill(perDoc.docID); + fill(docState.docID); // Append term vectors to the real outputs: - tvx.writeLong(tvd.getFilePointer()); + long pointer = tvd.getFilePointer(); + tvx.writeLong(pointer); tvx.writeLong(tvf.getFilePointer()); - tvd.writeVInt(perDoc.numVectorFields); - if (perDoc.numVectorFields > 0) { - for(int i=0;i 0) { + for(int i=0;i threadBindings = new HashMap(); - - boolean bufferIsFull; // True when it's time to write segment - private boolean aborting; // True if an abort is pending - PrintStream infoStream; SimilarityProvider similarityProvider; - // max # simultaneous threads; if there are more than - // this, they wait for others to finish first - private final int maxThreadStates; + List newFiles; - // TODO: cutover to BytesRefHash - // Deletes for our still-in-RAM (to be flushed next) segment - private BufferedDeletes pendingDeletes = new BufferedDeletes(false); - - static class DocState { - DocumentsWriter docWriter; - Analyzer analyzer; - PrintStream infoStream; - SimilarityProvider similarityProvider; - int docID; - Document doc; - String maxTermPrefix; + final IndexWriter indexWriter; - // Only called by asserts - public boolean testPoint(String name) { - return docWriter.writer.testPoint(name); - } + private AtomicInteger numDocsInRAM = new AtomicInteger(0); - public void clear() { - // don't hold onto doc nor analyzer, in case it is - // largish: - doc = null; - analyzer = null; - } - } + // TODO: cut over to BytesRefHash in BufferedDeletes + volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue(); + private final Queue ticketQueue = new LinkedList(); - /** Consumer returns this on each doc. This holds any - * state that must be flushed synchronized "in docID - * order". We gather these and flush them in order. */ - abstract static class DocWriter { - DocWriter next; - int docID; - abstract void finish() throws IOException; - abstract void abort(); - abstract long sizeInBytes(); + private Collection abortedFiles; // List of files that were written before last abort() - void setNext(DocWriter next) { - this.next = next; - } - } + final IndexingChain chain; - /** - * Create and return a new DocWriterBuffer. - */ - PerDocBuffer newPerDocBuffer() { - return new PerDocBuffer(); - } - - /** - * RAMFile buffer for DocWriters. - */ - class PerDocBuffer extends RAMFile { - - /** - * Allocate bytes used from shared pool. - */ - @Override - protected byte[] newBuffer(int size) { - assert size == PER_DOC_BLOCK_SIZE; - return perDocAllocator.getByteBlock(); + final DocumentsWriterPerThreadPool perThreadPool; + final FlushPolicy flushPolicy; + final DocumentsWriterFlushControl flushControl; + final Healthiness healthiness; + DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers, + BufferedDeletesStream bufferedDeletesStream) throws IOException { + this.directory = directory; + this.indexWriter = writer; + this.similarityProvider = config.getSimilarityProvider(); + this.perThreadPool = config.getIndexerThreadPool(); + this.chain = config.getIndexingChain(); + this.perThreadPool.initialize(this, globalFieldNumbers, config); + final FlushPolicy configuredPolicy = config.getFlushPolicy(); + if (configuredPolicy == null) { + flushPolicy = new FlushByRamOrCountsPolicy(); + } else { + flushPolicy = configuredPolicy; } + flushPolicy.init(this); - /** - * Recycle the bytes used. - */ - synchronized void recycle() { - if (buffers.size() > 0) { - setLength(0); - - // Recycle the blocks - perDocAllocator.recycleByteBlocks(buffers); - buffers.clear(); - sizeInBytes = 0; - - assert numBuffers() == 0; - } - } + healthiness = new Healthiness(); + final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024; + flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT); } - - /** - * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method - * which returns the DocConsumer that the DocumentsWriter calls to process the - * documents. - */ - abstract static class IndexingChain { - abstract DocConsumer getChain(DocumentsWriter documentsWriter); - } - - static final IndexingChain defaultIndexingChain = new IndexingChain() { - @Override - DocConsumer getChain(DocumentsWriter documentsWriter) { - /* - This is the current indexing chain: - - DocConsumer / DocConsumerPerThread - --> code: DocFieldProcessor / DocFieldProcessorPerThread - --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - --> code: DocInverter / DocInverterPerThread / DocInverterPerField - --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: TermsHash / TermsHashPerThread / TermsHashPerField - --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - */ - - // Build up indexing chain: - - final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); - final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - /* - * nesting TermsHash instances here to allow the secondary (TermVectors) share the interned postings - * via a shared ByteBlockPool. See TermsHashPerField for details. - */ - final TermsHash termVectorsTermHash = new TermsHash(documentsWriter, false, termVectorsWriter, null); - final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, termVectorsTermHash); - final NormsWriter normsWriter = new NormsWriter(); - final DocInverter docInverter = new DocInverter(termsHash, normsWriter); - return new DocFieldProcessor(documentsWriter, docInverter); + synchronized void deleteQueries(final Query... queries) throws IOException { + deleteQueue.addDelete(queries); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - }; - - final DocConsumer consumer; - - // How much RAM we can use before flushing. This is 0 if - // we are flushing by doc count instead. - - private final IndexWriterConfig config; - - private boolean closed; - private FieldInfos fieldInfos; - - private final BufferedDeletesStream bufferedDeletesStream; - private final IndexWriter.FlushControl flushControl; - - DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos, - BufferedDeletesStream bufferedDeletesStream) throws IOException { - this.directory = directory; - this.writer = writer; - this.similarityProvider = config.getSimilarityProvider(); - this.maxThreadStates = config.getMaxThreadStates(); - this.fieldInfos = fieldInfos; - this.bufferedDeletesStream = bufferedDeletesStream; - flushControl = writer.flushControl; - consumer = config.getIndexingChain().getChain(this); - this.config = config; } - // Buffer a specific docID for deletion. Currently only - // used when we hit a exception when adding a document - synchronized void deleteDocID(int docIDUpto) { - pendingDeletes.addDocID(docIDUpto); - // NOTE: we do not trigger flush here. This is - // potentially a RAM leak, if you have an app that tries - // to add docs but every single doc always hits a - // non-aborting exception. Allowing a flush here gets - // very messy because we are only invoked when handling - // exceptions so to do this properly, while handling an - // exception we'd have to go off and flush new deletes - // which is risky (likely would hit some other - // confounding exception). - } - - boolean deleteQueries(Query... queries) { - final boolean doFlush = flushControl.waitUpdate(0, queries.length); - synchronized(this) { - for (Query query : queries) { - pendingDeletes.addQuery(query, numDocs); - } + // TODO: we could check w/ FreqProxTermsWriter: if the + // term doesn't exist, don't bother buffering into the + // per-DWPT map (but still must go into the global map) + synchronized void deleteTerms(final Term... terms) throws IOException { + final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue; + deleteQueue.addDelete(terms); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - return doFlush; } - - boolean deleteQuery(Query query) { - final boolean doFlush = flushControl.waitUpdate(0, 1); - synchronized(this) { - pendingDeletes.addQuery(query, numDocs); - } - return doFlush; + + DocumentsWriterDeleteQueue currentDeleteSession() { + return deleteQueue; } - boolean deleteTerms(Term... terms) { - final boolean doFlush = flushControl.waitUpdate(0, terms.length); - synchronized(this) { - for (Term term : terms) { - pendingDeletes.addTerm(term, numDocs); + private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException { + if (deleteQueue != null) { + synchronized (ticketQueue) { + // Freeze and insert the delete flush ticket in the queue + ticketQueue.add(new FlushTicket(deleteQueue.freezeGlobalBuffer(null), false)); + applyFlushTickets(); } } - return doFlush; + indexWriter.applyAllDeletes(); + indexWriter.flushCount.incrementAndGet(); } - // TODO: we could check w/ FreqProxTermsWriter: if the - // term doesn't exist, don't bother buffering into the - // per-DWPT map (but still must go into the global map) - boolean deleteTerm(Term term, boolean skipWait) { - final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait); - synchronized(this) { - pendingDeletes.addTerm(term, numDocs); - } - return doFlush; - } - - /** If non-null, various details of indexing are printed - * here. */ synchronized void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; - for(int i=0;i it = perThreadPool.getAllPerThreadsIterator(); + while (it.hasNext()) { + it.next().perThread.docState.infoStream = infoStream; } } - /** Get current segment name we are writing. */ - synchronized String getSegment() { - return segment; + /** Returns how many docs are currently buffered in RAM. */ + int getNumDocs() { + return numDocsInRAM.get(); } - /** Returns how many docs are currently buffered in RAM. */ - synchronized int getNumDocs() { - return numDocs; + Collection abortedFiles() { + return abortedFiles; } - void message(String message) { + // returns boolean for asserts + boolean message(String message) { if (infoStream != null) { - writer.message("DW: " + message); + indexWriter.message("DW: " + message); } + return true; } - synchronized void setAborting() { - if (infoStream != null) { - message("setAborting"); + private void ensureOpen() throws AlreadyClosedException { + if (closed) { + throw new AlreadyClosedException("this IndexWriter is closed"); } - aborting = true; } /** Called if we hit an exception at a bad time (when @@ -378,816 +220,335 @@ * currently buffered docs. This resets our state, * discarding any docs added since last flush. */ synchronized void abort() throws IOException { - if (infoStream != null) { - message("docWriter: abort"); - } - boolean success = false; - try { + synchronized (this) { + deleteQueue.clear(); + } - // Forcefully remove waiting ThreadStates from line - waitQueue.abort(); - - // Wait for all other threads to finish with - // DocumentsWriter: - waitIdle(); - + try { if (infoStream != null) { - message("docWriter: abort waitIdle done"); + message("docWriter: abort"); } - assert 0 == waitQueue.numWaiting: "waitQueue.numWaiting=" + waitQueue.numWaiting; + final Iterator threadsIterator = perThreadPool.getActivePerThreadsIterator(); - waitQueue.waitingBytes = 0; - - pendingDeletes.clear(); - - for (DocumentsWriterThreadState threadState : threadStates) + while (threadsIterator.hasNext()) { + ThreadState perThread = threadsIterator.next(); + perThread.lock(); try { - threadState.consumer.abort(); - } catch (Throwable t) { + if (perThread.isActive()) { // we might be closed + perThread.perThread.abort(); + perThread.perThread.checkAndResetHasAborted(); + } else { + assert closed; + } + } finally { + perThread.unlock(); } - - try { - consumer.abort(); - } catch (Throwable t) { } - // Reset all postings data - doAfterFlush(); success = true; } finally { - aborting = false; - notifyAll(); if (infoStream != null) { - message("docWriter: done abort; success=" + success); + message("docWriter: done abort; abortedFiles=" + abortedFiles + " success=" + success); } } } - /** Reset after a flush */ - private void doAfterFlush() throws IOException { - // All ThreadStates should be idle when we are called - assert allThreadsIdle(); - for (DocumentsWriterThreadState threadState : threadStates) { - threadState.consumer.doAfterFlush(); - } - - threadBindings.clear(); - waitQueue.reset(); - segment = null; - fieldInfos = new FieldInfos(fieldInfos); - numDocs = 0; - nextDocID = 0; - bufferIsFull = false; - for(int i=0;i BD - final long delGen = bufferedDeletesStream.getNextGen(); - if (pendingDeletes.any()) { - if (segmentInfos.size() > 0 || newSegment != null) { - final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen); - if (infoStream != null) { - message("flush: push buffered deletes startSize=" + pendingDeletes.bytesUsed.get() + " frozenSize=" + packet.bytesUsed); - } - bufferedDeletesStream.push(packet); - if (infoStream != null) { - message("flush: delGen=" + packet.gen); - } - if (newSegment != null) { - newSegment.setBufferedDeletesGen(packet.gen); - } - } else { - if (infoStream != null) { - message("flush: drop buffered deletes: no segments"); - } - // We can safely discard these deletes: since - // there are no segments, the deletions cannot - // affect anything. - } - pendingDeletes.clear(); - } else if (newSegment != null) { - newSegment.setBufferedDeletesGen(delGen); - } + void close() { + closed = true; + flushControl.setClosed(); } - public boolean anyDeletions() { - return pendingDeletes.any(); - } + boolean updateDocument(final Document doc, final Analyzer analyzer, + final Term delTerm) throws CorruptIndexException, IOException { + ensureOpen(); + boolean maybeMerge = false; + final boolean isUpdate = delTerm != null; + if (healthiness.anyStalledThreads()) { - /** Flush all pending docs to a new segment */ - // Lock order: IW -> DW - synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException { - - final long startTime = System.currentTimeMillis(); - - // We change writer's segmentInfos: - assert Thread.holdsLock(writer); - - waitIdle(); - - if (numDocs == 0) { - // nothing to do! + // Help out flushing any pending DWPTs so we can un-stall: if (infoStream != null) { - message("flush: no docs; skipping"); + message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)"); } - // Lock order: IW -> DW -> BD - pushDeletes(null, segmentInfos); - return null; - } - if (aborting) { - if (infoStream != null) { - message("flush: skip because aborting is set"); - } - return null; - } - - boolean success = false; - - SegmentInfo newSegment; - - try { - assert nextDocID == numDocs; - assert waitQueue.numWaiting == 0; - assert waitQueue.waitingBytes == 0; - - if (infoStream != null) { - message("flush postings as segment " + segment + " numDocs=" + numDocs); - } - - final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos, - numDocs, writer.getConfig().getTermIndexInterval(), - fieldInfos.buildSegmentCodecs(true), - pendingDeletes); - // Apply delete-by-docID now (delete-byDocID only - // happens when an exception is hit processing that - // doc, eg if analyzer has some problem w/ the text): - if (pendingDeletes.docIDs.size() > 0) { - flushState.deletedDocs = new BitVector(numDocs); - for(int delDocID : pendingDeletes.docIDs) { - flushState.deletedDocs.set(delDocID); + // Try pick up pending threads here if possible + DocumentsWriterPerThread flushingDWPT; + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + // Don't push the delete here since the update could fail! + maybeMerge = doFlush(flushingDWPT); + if (!healthiness.anyStalledThreads()) { + break; } - pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); - pendingDeletes.docIDs.clear(); } - newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos); - - Collection threads = new HashSet(); - for (DocumentsWriterThreadState threadState : threadStates) { - threads.add(threadState.consumer); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter still has stalled threads; waiting"); } - double startMBUsed = bytesUsed()/1024./1024.; + healthiness.waitIfStalled(); // block if stalled - consumer.flush(threads, flushState); - - newSegment.setHasVectors(flushState.hasVectors); - - if (infoStream != null) { - message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); - if (flushState.deletedDocs != null) { - message("new segment has " + flushState.deletedDocs.count() + " deleted docs"); - } - message("flushedFiles=" + newSegment.files()); - message("flushed codecs=" + newSegment.getSegmentCodecs()); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter done waiting"); } + } - if (mergePolicy.useCompoundFile(segmentInfos, newSegment)) { - final String cfsFileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), + this, doc); + final DocumentsWriterPerThread flushingDWPT; + + try { - if (infoStream != null) { - message("flush: create compound file \"" + cfsFileName + "\""); - } - - CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, cfsFileName); - for(String fileName : newSegment.files()) { - cfsWriter.addFile(fileName); - } - cfsWriter.close(); - deleter.deleteNewFiles(newSegment.files()); - newSegment.setUseCompoundFile(true); + if (!perThread.isActive()) { + ensureOpen(); + assert false: "perThread is not active but we are still open"; } - - // Must write deleted docs after the CFS so we don't - // slurp the del file into CFS: - if (flushState.deletedDocs != null) { - final int delCount = flushState.deletedDocs.count(); - assert delCount > 0; - newSegment.setDelCount(delCount); - newSegment.advanceDelGen(); - final String delFileName = newSegment.getDelFileName(); - if (infoStream != null) { - message("flush: write " + delCount + " deletes to " + delFileName); + + final DocumentsWriterPerThread dwpt = perThread.perThread; + try { + dwpt.updateDocument(doc, analyzer, delTerm); + numDocsInRAM.incrementAndGet(); + } finally { + if (dwpt.checkAndResetHasAborted()) { + flushControl.doOnAbort(perThread); } - boolean success2 = false; - try { - // TODO: in the NRT case it'd be better to hand - // this del vector over to the - // shortly-to-be-opened SegmentReader and let it - // carry the changes; there's no reason to use - // filesystem as intermediary here. - flushState.deletedDocs.write(directory, delFileName); - success2 = true; - } finally { - if (!success2) { - try { - directory.deleteFile(delFileName); - } catch (Throwable t) { - // suppress this so we keep throwing the - // original exception - } - } - } } - - if (infoStream != null) { - message("flush: segment=" + newSegment); - final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; - final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; - message(" ramUsed=" + nf.format(startMBUsed) + " MB" + - " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + - " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + - " docs/MB=" + nf.format(numDocs / newSegmentSize) + - " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); - } - - success = true; + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); } finally { - notifyAll(); - if (!success) { - if (segment != null) { - deleter.refresh(segment); - } - abort(); - } + perThread.unlock(); } - - doAfterFlush(); - - // Lock order: IW -> DW -> BD - pushDeletes(newSegment, segmentInfos); - if (infoStream != null) { - message("flush time " + (System.currentTimeMillis()-startTime) + " msec"); - } - - return newSegment; - } - - synchronized void close() { - closed = true; - notifyAll(); - } - - /** Returns a free (idle) ThreadState that may be used for - * indexing this one document. This call also pauses if a - * flush is pending. If delTerm is non-null then we - * buffer this deleted term after the thread state has - * been acquired. */ - synchronized DocumentsWriterThreadState getThreadState(Document doc, Term delTerm) throws IOException { - - final Thread currentThread = Thread.currentThread(); - assert !Thread.holdsLock(writer); - - // First, find a thread state. If this thread already - // has affinity to a specific ThreadState, use that one - // again. - DocumentsWriterThreadState state = threadBindings.get(currentThread); - if (state == null) { - - // First time this thread has called us since last - // flush. Find the least loaded thread state: - DocumentsWriterThreadState minThreadState = null; - for(int i=0;i= maxThreadStates)) { - state = minThreadState; - state.numThreads++; - } else { - // Just create a new "private" thread state - DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1+threadStates.length]; - if (threadStates.length > 0) { - System.arraycopy(threadStates, 0, newArray, 0, threadStates.length); - } - state = newArray[threadStates.length] = new DocumentsWriterThreadState(this); - threadStates = newArray; - } - threadBindings.put(currentThread, state); } - - // Next, wait until my thread state is idle (in case - // it's shared with other threads), and no flush/abort - // pending - waitReady(state); - - // Allocate segment name if this is the first doc since - // last flush: - if (segment == null) { - segment = writer.newSegmentName(); - assert numDocs == 0; - } - - state.docState.docID = nextDocID++; - - if (delTerm != null) { - pendingDeletes.addTerm(delTerm, state.docState.docID); - } - - numDocs++; - state.isIdle = false; - return state; + return maybeMerge; } - - boolean addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - return updateDocument(doc, analyzer, null); - } - - boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm) - throws CorruptIndexException, IOException { - // Possibly trigger a flush, or wait until any running flush completes: - boolean doFlush = flushControl.waitUpdate(1, delTerm != null ? 1 : 0); - - // This call is synchronized but fast - final DocumentsWriterThreadState state = getThreadState(doc, delTerm); - - final DocState docState = state.docState; - docState.doc = doc; - docState.analyzer = analyzer; - - boolean success = false; - try { - // This call is not synchronized and does all the - // work - final DocWriter perDoc; + private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { + boolean maybeMerge = false; + while (flushingDWPT != null) { + maybeMerge = true; + boolean success = false; + FlushTicket ticket = null; + try { - perDoc = state.consumer.processDocument(fieldInfos); - } finally { - docState.clear(); - } - - // This call is synchronized but fast - finishDocument(state, perDoc); - - success = true; - } finally { - if (!success) { - - // If this thread state had decided to flush, we - // must clear it so another thread can flush - if (doFlush) { - flushControl.clearFlushPending(); - } - - if (infoStream != null) { - message("exception in updateDocument aborting=" + aborting); - } - - synchronized(this) { - - state.isIdle = true; - notifyAll(); - - if (aborting) { - abort(); - } else { - skipDocWriter.docID = docState.docID; - boolean success2 = false; - try { - waitQueue.add(skipDocWriter); - success2 = true; - } finally { - if (!success2) { - abort(); - return false; - } + assert currentFullFlushDelQueue == null + || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " + + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + + " " + flushControl.isFullFlush(); + /* + * Since with DWPT the flush process is concurrent and several DWPT + * could flush at the same time we must maintain the order of the + * flushes before we can apply the flushed segment and the frozen global + * deletes it is buffering. The reason for this is that the global + * deletes mark a certain point in time where we took a DWPT out of + * rotation and freeze the global deletes. + * + * Example: A flush 'A' starts and freezes the global deletes, then + * flush 'B' starts and freezes all deletes occurred since 'A' has + * started. if 'B' finishes before 'A' we need to wait until 'A' is done + * otherwise the deletes frozen by 'B' are not applied to 'A' and we + * might miss to deletes documents in 'A'. + */ + try { + synchronized (ticketQueue) { + // Each flush is assigned a ticket in the order they accquire the ticketQueue lock + ticket = new FlushTicket(flushingDWPT.prepareFlush(), true); + ticketQueue.add(ticket); + } + + // flush concurrently without locking + final FlushedSegment newSegment = flushingDWPT.flush(); + synchronized (ticketQueue) { + ticket.segment = newSegment; + } + // flush was successful once we reached this point - new seg. has been assigned to the ticket! + success = true; + } finally { + if (!success && ticket != null) { + synchronized (ticketQueue) { + // In the case of a failure make sure we are making progress and + // apply all the deletes since the segment flush failed since the flush + // ticket could hold global deletes see FlushTicket#canPublish() + ticket.isSegmentFlush = false; } - - // Immediately mark this document as deleted - // since likely it was partially added. This - // keeps indexing as "all or none" (atomic) when - // adding a document: - deleteDocID(state.docState.docID); } } + /* + * Now we are done and try to flush the ticket queue if the head of the + * queue has already finished the flush. + */ + applyFlushTickets(); + } finally { + flushControl.doAfterFlush(flushingDWPT); + flushingDWPT.checkAndResetHasAborted(); + indexWriter.flushCount.incrementAndGet(); } + + flushingDWPT = flushControl.nextPendingFlush(); } - - doFlush |= flushControl.flushByRAMUsage("new document"); - - return doFlush; + return maybeMerge; } - public synchronized void waitIdle() { - while (!allThreadsIdle()) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + private void applyFlushTickets() throws IOException { + synchronized (ticketQueue) { + while (true) { + // Keep publishing eligible flushed segments: + final FlushTicket head = ticketQueue.peek(); + if (head != null && head.canPublish()) { + ticketQueue.poll(); + finishFlush(head.segment, head.frozenDeletes); + } else { + break; + } } } } - synchronized void waitReady(DocumentsWriterThreadState state) { - while (!closed && (!state.isIdle || aborting)) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + private void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes) + throws IOException { + // Finish the flushed segment and publish it to IndexWriter + if (newSegment == null) { + assert bufferedDeletes != null; + if (bufferedDeletes != null && bufferedDeletes.any()) { + indexWriter.bufferedDeletesStream.push(bufferedDeletes); + if (infoStream != null) { + message("flush: push buffered deletes: " + bufferedDeletes); + } } + } else { + publishFlushedSegment(newSegment, bufferedDeletes); } - - if (closed) { - throw new AlreadyClosedException("this IndexWriter is closed"); - } } - /** Does the synchronized work to finish/flush the - * inverted document. */ - private void finishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) throws IOException { - - // Must call this w/o holding synchronized(this) else - // we'll hit deadlock: - balanceRAM(); - - synchronized(this) { - - assert docWriter == null || docWriter.docID == perThread.docState.docID; - - if (aborting) { - - // We are currently aborting, and another thread is - // waiting for me to become idle. We just forcefully - // idle this threadState; it will be fully reset by - // abort() - if (docWriter != null) { - try { - docWriter.abort(); - } catch (Throwable t) { - } - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); - - return; - } - - final boolean doPause; - - if (docWriter != null) { - doPause = waitQueue.add(docWriter); - } else { - skipDocWriter.docID = perThread.docState.docID; - doPause = waitQueue.add(skipDocWriter); - } - - if (doPause) { - waitForWaitQueue(); - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); + final void subtractFlushedNumDocs(int numFlushed) { + int oldValue = numDocsInRAM.get(); + while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) { + oldValue = numDocsInRAM.get(); } } - - synchronized void waitForWaitQueue() { - do { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + + /** + * Publishes the flushed segment, segment private deletes (if any) and its + * associated global delete (if present) to IndexWriter. The actual + * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s + * delete generation is always GlobalPacket_deleteGeneration + 1 + */ + private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket) + throws IOException { + assert newSegment != null; + final SegmentInfo segInfo = indexWriter.prepareFlushedSegment(newSegment); + final BufferedDeletes deletes = newSegment.segmentDeletes; + FrozenBufferedDeletes packet = null; + if (deletes != null && deletes.any()) { + // Segment private delete + packet = new FrozenBufferedDeletes(deletes, true); + if (infoStream != null) { + message("flush: push buffered seg private deletes: " + packet); } - } while (!waitQueue.doResume()); - } - - private static class SkipDocWriter extends DocWriter { - @Override - void finish() { } - @Override - void abort() { - } - @Override - long sizeInBytes() { - return 0; - } - } - final SkipDocWriter skipDocWriter = new SkipDocWriter(); - NumberFormat nf = NumberFormat.getInstance(); - - /* Initial chunks size of the shared byte[] blocks used to - store postings data */ - final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; - - /* if you increase this, you must fix field cache impl for - * getTerms/getTermsIndex requires <= 32768. */ - final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; - - /* Initial chunks size of the shared int[] blocks used to - store postings data */ - final static int INT_BLOCK_SHIFT = 13; - final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; - final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; - - private List freeIntBlocks = new ArrayList(); - - /* Allocate another int[] from the shared pool */ - synchronized int[] getIntBlock() { - final int size = freeIntBlocks.size(); - final int[] b; - if (0 == size) { - b = new int[INT_BLOCK_SIZE]; - bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT); - } else { - b = freeIntBlocks.remove(size-1); - } - return b; + // now publish! + indexWriter.publishFlushedSegment(segInfo, packet, globalPacket); } - - long bytesUsed() { - return bytesUsed.get() + pendingDeletes.bytesUsed.get(); + + // for asserts + private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null; + // for asserts + private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) { + currentFullFlushDelQueue = session; + return true; } + + /* + * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a + * two stage operation; the caller must ensure (in try/finally) that finishFlush + * is called after this method, to release the flush lock in DWFlushControl + */ + final boolean flushAllThreads(final boolean flushDeletes) + throws IOException { + final DocumentsWriterDeleteQueue flushingDeleteQueue; - /* Return int[]s to the pool */ - synchronized void recycleIntBlocks(int[][] blocks, int start, int end) { - for(int i=start;i= ramBufferSize; - } - - if (doBalance) { - - if (infoStream != null) { - message(" RAM: balance allocations: usedMB=" + toMB(bytesUsed()) + - " vs trigger=" + toMB(ramBufferSize) + - " deletesMB=" + toMB(deletesRAMUsed) + - " byteBlockFree=" + toMB(byteBlockAllocator.bytesUsed()) + - " perDocFree=" + toMB(perDocAllocator.bytesUsed())); + boolean anythingFlushed = false; + try { + DocumentsWriterPerThread flushingDWPT; + // Help out with flushing: + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + anythingFlushed |= doFlush(flushingDWPT); } - - final long startBytesUsed = bytesUsed() + deletesRAMUsed; - - int iter = 0; - - // We free equally from each pool in 32 KB - // chunks until we are below our threshold - // (freeLevel) - - boolean any = true; - - final long freeLevel = (long) (0.95 * ramBufferSize); - - while(bytesUsed()+deletesRAMUsed > freeLevel) { - - synchronized(this) { - if (0 == perDocAllocator.numBufferedBlocks() && - 0 == byteBlockAllocator.numBufferedBlocks() && - 0 == freeIntBlocks.size() && !any) { - // Nothing else to free -- must flush now. - bufferIsFull = bytesUsed()+deletesRAMUsed > ramBufferSize; - if (infoStream != null) { - if (bytesUsed()+deletesRAMUsed > ramBufferSize) { - message(" nothing to free; set bufferIsFull"); - } else { - message(" nothing to free"); - } - } - break; - } - - if ((0 == iter % 4) && byteBlockAllocator.numBufferedBlocks() > 0) { - byteBlockAllocator.freeBlocks(1); - } - if ((1 == iter % 4) && freeIntBlocks.size() > 0) { - freeIntBlocks.remove(freeIntBlocks.size()-1); - bytesUsed.addAndGet(-INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT); - } - if ((2 == iter % 4) && perDocAllocator.numBufferedBlocks() > 0) { - perDocAllocator.freeBlocks(32); // Remove upwards of 32 blocks (each block is 1K) - } + // If a concurrent flush is still in flight wait for it + while (flushControl.anyFlushing()) { + flushControl.waitForFlush(); + } + if (!anythingFlushed && flushDeletes) { + synchronized (ticketQueue) { + ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false)); } - - if ((3 == iter % 4) && any) { - // Ask consumer to free any recycled state - any = consumer.freeRAM(); - } - - iter++; + applyFlushTickets(); } - - if (infoStream != null) { - message(" after free: freedMB=" + nf.format((startBytesUsed-bytesUsed()-deletesRAMUsed)/1024./1024.) + " usedMB=" + nf.format((bytesUsed()+deletesRAMUsed)/1024./1024.)); - } + } finally { + assert flushingDeleteQueue == currentFullFlushDelQueue; } + return anythingFlushed; } - - final WaitQueue waitQueue = new WaitQueue(); - - private class WaitQueue { - DocWriter[] waiting; - int nextWriteDocID; - int nextWriteLoc; - int numWaiting; - long waitingBytes; - - public WaitQueue() { - waiting = new DocWriter[10]; + + final void finishFullFlush(boolean success) { + assert setFlushingDeleteQueue(null); + if (success) { + // Release the flush lock + flushControl.finishFullFlush(); + } else { + flushControl.abortFullFlushes(); } + } - synchronized void reset() { - // NOTE: nextWriteLoc doesn't need to be reset - assert numWaiting == 0; - assert waitingBytes == 0; - nextWriteDocID = 0; + static final class FlushTicket { + final FrozenBufferedDeletes frozenDeletes; + /* access to non-final members must be synchronized on DW#ticketQueue */ + FlushedSegment segment; + boolean isSegmentFlush; + + FlushTicket(FrozenBufferedDeletes frozenDeletes, boolean isSegmentFlush) { + this.frozenDeletes = frozenDeletes; + this.isSegmentFlush = isSegmentFlush; } - - synchronized boolean doResume() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueueResumeBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueueResumeBytes = 2*1024*1024; - } else { - waitQueueResumeBytes = (long) (mb*1024*1024*0.05); - } - return waitingBytes <= waitQueueResumeBytes; + + boolean canPublish() { + return (!isSegmentFlush || segment != null); } - - synchronized boolean doPause() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueuePauseBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueuePauseBytes = 4*1024*1024; - } else { - waitQueuePauseBytes = (long) (mb*1024*1024*0.1); - } - return waitingBytes > waitQueuePauseBytes; - } - - synchronized void abort() { - int count = 0; - for(int i=0;i= nextWriteDocID; - - if (doc.docID == nextWriteDocID) { - writeDocument(doc); - while(true) { - doc = waiting[nextWriteLoc]; - if (doc != null) { - numWaiting--; - waiting[nextWriteLoc] = null; - waitingBytes -= doc.sizeInBytes(); - writeDocument(doc); - } else { - break; - } - } - } else { - - // I finished before documents that were added - // before me. This can easily happen when I am a - // small doc and the docs before me were large, or, - // just due to luck in the thread scheduling. Just - // add myself to the queue and when that large doc - // finishes, it will flush me: - int gap = doc.docID - nextWriteDocID; - if (gap >= waiting.length) { - // Grow queue - DocWriter[] newArray = new DocWriter[ArrayUtil.oversize(gap, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - assert nextWriteLoc >= 0; - System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc); - System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc); - nextWriteLoc = 0; - waiting = newArray; - gap = doc.docID - nextWriteDocID; - } - - int loc = nextWriteLoc + gap; - if (loc >= waiting.length) { - loc -= waiting.length; - } - - // We should only wrap one time - assert loc < waiting.length; - - // Nobody should be in my spot! - assert waiting[loc] == null; - waiting[loc] = doc; - numWaiting++; - waitingBytes += doc.sizeInBytes(); - } - - return doPause(); - } } } Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (working copy) @@ -28,11 +28,10 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField { - final TermVectorsTermsWriterPerThread perThread; final TermsHashPerField termsHashPerField; final TermVectorsTermsWriter termsWriter; final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; boolean doVectors; @@ -41,11 +40,10 @@ int maxNumPostings; OffsetAttribute offsetAttribute = null; - - public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) { + + public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) { this.termsHashPerField = termsHashPerField; - this.perThread = perThread; - this.termsWriter = perThread.termsWriter; + this.termsWriter = termsWriter; this.fieldInfo = fieldInfo; docState = termsHashPerField.docState; fieldState = termsHashPerField.fieldState; @@ -72,65 +70,55 @@ } if (doVectors) { - if (perThread.doc == null) { - perThread.doc = termsWriter.getPerDoc(); - perThread.doc.docID = docState.docID; - assert perThread.doc.numVectorFields == 0; - assert 0 == perThread.doc.perDocTvf.length(); - assert 0 == perThread.doc.perDocTvf.getFilePointer(); + termsWriter.hasVectors = true; + if (termsHashPerField.bytesHash.size() != 0) { + // Only necessary if previous doc hit a + // non-aborting exception while writing vectors in + // this field: + termsHashPerField.reset(); + } } - assert perThread.doc.docID == docState.docID; - - if (termsHashPerField.bytesHash.size() != 0) { - // Only necessary if previous doc hit a - // non-aborting exception while writing vectors in - // this field: - termsHashPerField.reset(); - perThread.termsHashPerThread.reset(false); - } - } - // TODO: only if needed for performance //perThread.postingsCount = 0; return doVectors; - } + } public void abort() {} /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to - * the real term vectors files in the Directory. */ - @Override + * the real term vectors files in the Directory. */ @Override void finish() throws IOException { + if (!doVectors || termsHashPerField.bytesHash.size() == 0) + return; + termsWriter.addFieldToFlush(this); + } + + void finishDocument() throws IOException { assert docState.testPoint("TermVectorsTermsWriterPerField.finish start"); final int numPostings = termsHashPerField.bytesHash.size(); - final BytesRef flushTerm = perThread.flushTerm; + final BytesRef flushTerm = termsWriter.flushTerm; assert numPostings >= 0; - if (!doVectors || numPostings == 0) - return; - if (numPostings > maxNumPostings) maxNumPostings = numPostings; - final IndexOutput tvf = perThread.doc.perDocTvf; - // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; - assert perThread.vectorFieldsInOrder(fieldInfo); + assert termsWriter.vectorFieldsInOrder(fieldInfo); - perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; + final IndexOutput tvf = termsWriter.tvf; // TODO: we may want to make this sort in same order // as Codec's terms dict? @@ -140,21 +128,21 @@ byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; - if (doVectorOffsets) + if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; - - final ByteSliceReader reader = perThread.vectorSliceReader; - final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; + final ByteSliceReader reader = termsWriter.vectorSliceReader; + final ByteBlockPool termBytePool = termsHashPerField.termBytePool; + for(int j=0;j segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization private int optimizeMaxNumSegments; @@ -247,12 +249,12 @@ private long mergeGen; private boolean stopMerges; - private final AtomicInteger flushCount = new AtomicInteger(); - private final AtomicInteger flushDeletesCount = new AtomicInteger(); + final AtomicInteger flushCount = new AtomicInteger(); + final AtomicInteger flushDeletesCount = new AtomicInteger(); final ReaderPool readerPool = new ReaderPool(); final BufferedDeletesStream bufferedDeletesStream; - + // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and // then cooled to permanently record the event): it's @@ -339,31 +341,56 @@ */ IndexReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); - + final long tStart = System.currentTimeMillis(); if (infoStream != null) { message("flush at getReader"); } - // Do this up front before flushing so that the readers // obtained during this flush are pooled, the first time // this method is called: poolReaders = true; - - // Prevent segmentInfos from changing while opening the - // reader; in theory we could do similar retry logic, - // just like we do when loading segments_N - IndexReader r; - synchronized(this) { - flush(false, applyAllDeletes); - r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); - if (infoStream != null) { - message("return reader version=" + r.getVersion() + " reader=" + r); + final IndexReader r; + doBeforeFlush(); + final boolean maybeMerge; + /* + * for releasing a NRT reader we must ensure that + * DW doesn't add any segments or deletes until we are + * done with creating the NRT DirectoryReader. + * We release the two stage full flush after we are done opening the + * directory reader! + */ + synchronized (fullFlushLock) { + boolean success = false; + try { + maybeMerge = docWriter.flushAllThreads(applyAllDeletes); + if (!maybeMerge) { + flushCount.incrementAndGet(); + } + success = true; + // Prevent segmentInfos from changing while opening the + // reader; in theory we could do similar retry logic, + // just like we do when loading segments_N + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); + if (infoStream != null) { + message("return reader version=" + r.getVersion() + " reader=" + r); + } + } + } finally { + if (!success && infoStream != null) { + message("hit exception during while NRT reader"); + } + // Done: finish the full flush! + docWriter.finishFullFlush(success); + doAfterFlush(); } } - maybeMerge(); - + if(maybeMerge) { + maybeMerge(); + } if (infoStream != null) { message("getReader took " + (System.currentTimeMillis() - tStart) + " msec"); } @@ -400,10 +427,10 @@ if (r != null) { r.hasChanges = false; } - } + } } } - + // used only by asserts public synchronized boolean infoIsLive(SegmentInfo info) { int idx = segmentInfos.indexOf(info); @@ -419,7 +446,7 @@ } return info; } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -432,7 +459,7 @@ public synchronized boolean release(SegmentReader sr) throws IOException { return release(sr, false); } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -493,7 +520,7 @@ sr.close(); } } - + /** Remove all our references to readers, and commits * any pending changes. */ synchronized void close() throws IOException { @@ -503,7 +530,7 @@ Iterator> iter = readerMap.entrySet().iterator(); while (iter.hasNext()) { - + Map.Entry ent = iter.next(); SegmentReader sr = ent.getValue(); @@ -526,7 +553,7 @@ sr.decRef(); } } - + /** * Commit all segment reader in the pool. * @throws IOException @@ -550,7 +577,7 @@ } } } - + /** * Returns a ref to a clone. NOTE: this clone is not * enrolled in the pool, so you should simply close() @@ -564,7 +591,7 @@ sr.decRef(); } } - + /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} @@ -580,7 +607,7 @@ /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} - * + * * @see #release(SegmentReader) * @param info * @param doOpenStores @@ -638,7 +665,7 @@ return sr; } } - + /** * Obtain the number of deleted docs for a pooled reader. * If the reader isn't being pooled, the segmentInfo's @@ -658,7 +685,7 @@ } } } - + /** * Used internally to throw an {@link * AlreadyClosedException} if this IndexWriter has been @@ -721,7 +748,7 @@ mergePolicy.setIndexWriter(this); mergeScheduler = conf.getMergeScheduler(); codecs = conf.getCodecProvider(); - + bufferedDeletesStream = new BufferedDeletesStream(messageID); bufferedDeletesStream.setInfoStream(infoStream); poolReaders = conf.getReaderPooling(); @@ -790,8 +817,7 @@ // start with previous field numbers, but new FieldInfos globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory); - docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(), - globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream); + docWriter = new DocumentsWriter(config, directory, this, globalFieldNumberMap, bufferedDeletesStream); docWriter.setInfoStream(infoStream); // Default deleter (for backwards compatibility) is @@ -849,7 +875,7 @@ public IndexWriterConfig getConfig() { return config; } - + /** If non-null, this will be the default infoStream used * by a newly instantiated IndexWriter. * @see #setInfoStream @@ -901,7 +927,7 @@ public boolean verbose() { return infoStream != null; } - + /** * Commits all changes to an index and closes all * associated files. Note that this may be a costly @@ -916,7 +942,7 @@ * even though part of it (flushing buffered documents) * may have succeeded, so the write lock will still be * held.

- * + * *

If you can correct the underlying cause (eg free up * some disk space) then you can call close() again. * Failing that, if you want to force the write lock to be @@ -1036,7 +1062,7 @@ if (infoStream != null) message("now call final commit()"); - + if (!hitOOM) { commitInternal(null); } @@ -1049,7 +1075,7 @@ docWriter = null; deleter.close(); } - + if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -1072,7 +1098,7 @@ } /** Returns the Directory used by this index. */ - public Directory getDirectory() { + public Directory getDirectory() { // Pass false because the flush during closing calls getDirectory ensureOpen(false); return directory; @@ -1196,22 +1222,7 @@ * @throws IOException if there is a low-level IO error */ public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - ensureOpen(); - boolean doFlush = false; - boolean success = false; - try { - try { - doFlush = docWriter.updateDocument(doc, analyzer, null); - success = true; - } finally { - if (!success && infoStream != null) - message("hit exception adding document"); - } - if (doFlush) - flush(true, false); - } catch (OutOfMemoryError oom) { - handleOOM(oom, "addDocument"); - } + updateDocument(null, doc, analyzer); } /** @@ -1228,9 +1239,7 @@ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteTerm(term, false)) { - flush(true, false); - } + docWriter.deleteTerms(term); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Term)"); } @@ -1252,9 +1261,7 @@ public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteTerms(terms)) { - flush(true, false); - } + docWriter.deleteTerms(terms); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Term..)"); } @@ -1274,9 +1281,7 @@ public void deleteDocuments(Query query) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteQuery(query)) { - flush(true, false); - } + docWriter.deleteQueries(query); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Query)"); } @@ -1298,9 +1303,7 @@ public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteQueries(queries)) { - flush(true, false); - } + docWriter.deleteQueries(queries); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Query..)"); } @@ -1350,17 +1353,18 @@ throws CorruptIndexException, IOException { ensureOpen(); try { - boolean doFlush = false; boolean success = false; + boolean maybeMerge = false; try { - doFlush = docWriter.updateDocument(doc, analyzer, term); + maybeMerge = docWriter.updateDocument(doc, analyzer, term); success = true; } finally { if (!success && infoStream != null) message("hit exception updating document"); } - if (doFlush) { - flush(true, false); + + if (maybeMerge) { + maybeMerge(); } } catch (OutOfMemoryError oom) { handleOOM(oom, "updateDocument"); @@ -1546,7 +1550,7 @@ resetMergeExceptions(); segmentsToOptimize = new HashSet(segmentInfos); optimizeMaxNumSegments = maxNumSegments; - + // Now mark all pending & running merges as optimize // merge: for(final MergePolicy.OneMerge merge : pendingMerges) { @@ -1612,12 +1616,12 @@ if (merge.optimize) return true; } - + for (final MergePolicy.OneMerge merge : runningMerges) { if (merge.optimize) return true; } - + return false; } @@ -1914,7 +1918,7 @@ /** * Delete all documents in the index. * - *

This method will drop all buffered documents and will + *

This method will drop all buffered documents and will * remove all segments from the index. This change will not be * visible until a {@link #commit()} has been called. This method * can be rolled back using {@link #rollback()}.

@@ -1944,7 +1948,7 @@ deleter.refresh(); // Don't bother saving any changes in our segmentInfos - readerPool.clear(null); + readerPool.clear(null); // Mark that the index has changed ++changeCount; @@ -1971,7 +1975,7 @@ mergeFinish(merge); } pendingMerges.clear(); - + for (final MergePolicy.OneMerge merge : runningMerges) { if (infoStream != null) message("now abort running merge " + merge.segString(directory)); @@ -1998,7 +2002,7 @@ message("all running merges have aborted"); } else { - // waitForMerges() will ensure any running addIndexes finishes. + // waitForMerges() will ensure any running addIndexes finishes. // It's fine if a new one attempts to start because from our // caller above the call will see that we are in the // process of closing, and will throw an @@ -2010,7 +2014,7 @@ /** * Wait for any currently outstanding merges to finish. * - *

It is guaranteed that any merges started prior to calling this method + *

It is guaranteed that any merges started prior to calling this method * will have completed once this method completes.

*/ public synchronized void waitForMerges() { @@ -2040,6 +2044,125 @@ deleter.checkpoint(segmentInfos, false); } + /** + * Prepares the {@link SegmentInfo} for the new flushed segment and persists + * the deleted documents {@link BitVector}. Use + * {@link #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)} to + * publish the returned {@link SegmentInfo} together with its segment private + * delete packet. + * + * @see #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes) + */ + SegmentInfo prepareFlushedSegment(FlushedSegment flushedSegment) throws IOException { + assert flushedSegment != null; + + SegmentInfo newSegment = flushedSegment.segmentInfo; + + setDiagnostics(newSegment, "flush"); + + boolean success = false; + try { + if (useCompoundFile(newSegment)) { + String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + message("creating compound file " + compoundFileName); + // Now build compound file + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); + for(String fileName : newSegment.files()) { + cfsWriter.addFile(fileName); + } + + // Perform the merge + cfsWriter.close(); + synchronized(this) { + deleter.deleteNewFiles(newSegment.files()); + } + + newSegment.setUseCompoundFile(true); + } + + // Must write deleted docs after the CFS so we don't + // slurp the del file into CFS: + if (flushedSegment.deletedDocuments != null) { + final int delCount = flushedSegment.deletedDocuments.count(); + assert delCount > 0; + newSegment.setDelCount(delCount); + newSegment.advanceDelGen(); + final String delFileName = newSegment.getDelFileName(); + if (infoStream != null) { + message("flush: write " + delCount + " deletes to " + delFileName); + } + boolean success2 = false; + try { + // TODO: in the NRT case it'd be better to hand + // this del vector over to the + // shortly-to-be-opened SegmentReader and let it + // carry the changes; there's no reason to use + // filesystem as intermediary here. + flushedSegment.deletedDocuments.write(directory, delFileName); + success2 = true; + } finally { + if (!success2) { + try { + directory.deleteFile(delFileName); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } + } + + success = true; + } finally { + if (!success) { + if (infoStream != null) { + message("hit exception " + + "reating compound file for newly flushed segment " + newSegment.name); + } + + synchronized(this) { + deleter.refresh(newSegment.name); + } + } + } + return newSegment; + } + + /** + * Atomically adds the segment private delete packet and publishes the flushed + * segments SegmentInfo to the index writer. NOTE: use + * {@link #prepareFlushedSegment(FlushedSegment)} to obtain the + * {@link SegmentInfo} for the flushed segment. + * + * @see #prepareFlushedSegment(FlushedSegment) + */ + synchronized void publishFlushedSegment(SegmentInfo newSegment, + FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException { + // Lock order IW -> BDS + synchronized (bufferedDeletesStream) { + if (globalPacket != null && globalPacket.any()) { + bufferedDeletesStream.push(globalPacket); + } + // Publishing the segment must be synched on IW -> BDS to make the sure + // that no merge prunes away the seg. private delete packet + final long nextGen; + if (packet != null && packet.any()) { + nextGen = bufferedDeletesStream.push(packet); + } else { + // Since we don't have a delete packet to apply we can get a new + // generation right away + nextGen = bufferedDeletesStream.getNextGen(); + } + newSegment.setBufferedDeletesGen(nextGen); + segmentInfos.add(newSegment); + checkpoint(); + } + } + + synchronized boolean useCompoundFile(SegmentInfo segmentInfo) throws IOException { + return mergePolicy.useCompoundFile(segmentInfos, segmentInfo); + } + private synchronized void resetMergeExceptions() { mergeExceptions = new ArrayList(); mergeGen++; @@ -2088,11 +2211,11 @@ *

* NOTE: this method only copies the segments of the incoming indexes * and does not merge them. Therefore deleted documents are not removed and - * the new segments are not merged with the existing ones. Also, the segments - * are copied as-is, meaning they are not converted to CFS if they aren't, - * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} + * the new segments are not merged with the existing ones. Also, the segments + * are copied as-is, meaning they are not converted to CFS if they aren't, + * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} * or {@link #optimize} afterwards. - * + * *

This requires this index not be among those to be added. * *

@@ -2129,7 +2252,7 @@ docCount += info.docCount; String newSegName = newSegmentName(); String dsName = info.getDocStoreSegment(); - + if (infoStream != null) { message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); } @@ -2176,7 +2299,7 @@ infos.add(info); } - } + } synchronized (this) { ensureOpen(); @@ -2225,11 +2348,12 @@ SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, null, codecs, payloadProcessorProvider, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs))); - + for (IndexReader reader : readers) // add new indexes merger.add(reader); - + int docCount = merger.merge(); // merge 'em + final FieldInfos fieldInfos = merger.fieldInfos(); SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), @@ -2241,11 +2365,11 @@ synchronized(this) { // Guard segmentInfos useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info); } - + // Now create the compound file if needed if (useCompoundFile) { merger.createCompoundFile(mergedName + ".cfs", info); - + // delete new non cfs files directly: they were never // registered with IFD deleter.deleteNewFiles(info.files()); @@ -2297,7 +2421,7 @@ * #commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

- * + * * You can also just call {@link #commit(Map)} directly * without prepareCommit first in which case that method * will internally call prepareCommit. @@ -2441,6 +2565,10 @@ } } + // Ensures only one flush() is actually flushing segments + // at a time: + private final Object fullFlushLock = new Object(); + /** * Flush all in-memory buffered updates (adds and deletes) * to the Directory. @@ -2464,117 +2592,105 @@ } } - // TODO: this method should not have to be entirely - // synchronized, ie, merges should be allowed to commit - // even while a flush is happening - private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { - + private boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { if (hitOOM) { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush"); } doBeforeFlush(); - assert testPoint("startDoFlush"); - - // We may be flushing because it was triggered by doc - // count, del count, ram usage (in which case flush - // pending is already set), or we may be flushing - // due to external event eg getReader or commit is - // called (in which case we now set it, and this will - // pause all threads): - flushControl.setFlushPendingNoWait("explicit flush"); - boolean success = false; - try { if (infoStream != null) { message(" start flush: applyAllDeletes=" + applyAllDeletes); message(" index before flush " + segString()); } - - final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos); - if (newSegment != null) { - setDiagnostics(newSegment, "flush"); - segmentInfos.add(newSegment); - checkpoint(); - } - - if (!applyAllDeletes) { - // If deletes alone are consuming > 1/2 our RAM - // buffer, force them all to apply now. This is to - // prevent too-frequent flushing of a long tail of - // tiny segments: - if (flushControl.getFlushDeletes() || - (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH && - bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) { - applyAllDeletes = true; - if (infoStream != null) { - message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB())); - } + final boolean maybeMerge; + + synchronized (fullFlushLock) { + try { + maybeMerge = docWriter.flushAllThreads(applyAllDeletes); + success = true; + } finally { + docWriter.finishFullFlush(success); } } - - if (applyAllDeletes) { - if (infoStream != null) { - message("apply all deletes during flush"); + success = false; + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + doAfterFlush(); + if (!maybeMerge) { + // flushCount is incremented in flushAllThreads + flushCount.incrementAndGet(); } - flushDeletesCount.incrementAndGet(); - final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos); - if (result.anyDeletes) { - checkpoint(); - } - if (!keepFullyDeletedSegments && result.allDeleted != null) { - if (infoStream != null) { - message("drop 100% deleted segments: " + result.allDeleted); - } - for(SegmentInfo info : result.allDeleted) { - // If a merge has already registered for this - // segment, we leave it in the readerPool; the - // merge will skip merging it and will then drop - // it once it's done: - if (!mergingSegments.contains(info)) { - segmentInfos.remove(info); - if (readerPool != null) { - readerPool.drop(info); - } - } - } - checkpoint(); - } - bufferedDeletesStream.prune(segmentInfos); - assert !bufferedDeletesStream.any(); - flushControl.clearDeletes(); - } else if (infoStream != null) { - message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + success = true; + return maybeMerge; } - - doAfterFlush(); - flushCount.incrementAndGet(); - - success = true; - - return newSegment != null; - } catch (OutOfMemoryError oom) { handleOOM(oom, "doFlush"); // never hit return false; } finally { - flushControl.clearFlushPending(); if (!success && infoStream != null) message("hit exception during flush"); } } + + final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException { + if (applyAllDeletes) { + if (infoStream != null) { + message("apply all deletes during flush"); + } + applyAllDeletes(); + } else if (infoStream != null) { + message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + } + } + + final synchronized void applyAllDeletes() throws IOException { + flushDeletesCount.incrementAndGet(); + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream + .applyDeletes(readerPool, segmentInfos); + if (result.anyDeletes) { + checkpoint(); + } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream != null) { + message("drop 100% deleted segments: " + result.allDeleted); + } + for (SegmentInfo info : result.allDeleted) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(info)) { + segmentInfos.remove(info); + if (readerPool != null) { + readerPool.drop(info); + } + } + } + checkpoint(); + } + bufferedDeletesStream.prune(segmentInfos); + } + /** Expert: Return the total size of all index files currently cached in memory. * Useful for size management with flushRamDocs() */ public final long ramSizeInBytes() { ensureOpen(); - return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed(); + return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed(); } + + // for testing only + DocumentsWriter getDocsWriter() { + boolean test = false; + assert test = true; + return test?docWriter: null; + } /** Expert: Return the number of documents currently * buffered in RAM. */ @@ -2709,7 +2825,7 @@ } commitMergedDeletes(merge, mergedReader); - + // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound @@ -2723,7 +2839,7 @@ message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); } - final Set mergedAway = new HashSet(merge.segments); + final Set mergedAway = new HashSet(merge.segments); int segIdx = 0; int newSegIdx = 0; boolean inserted = false; @@ -2770,15 +2886,15 @@ // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.clear(merge.segments); - + if (merge.optimize) { // cascade the optimize: segmentsToOptimize.add(merge.info); } - + return true; } - + final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException { if (infoStream != null) { @@ -2867,7 +2983,7 @@ /** Hook that's called when the specified merge is complete. */ void mergeSuccess(MergePolicy.OneMerge merge) { } - + /** Checks whether this merge involves any segments * already participating in a merge. If not, this merge * is "registered", meaning we record that its segments @@ -2998,7 +3114,6 @@ // Lock order: IW -> BD bufferedDeletesStream.prune(segmentInfos); - Map details = new HashMap(); details.put("optimize", Boolean.toString(merge.optimize)); details.put("mergeFactor", Integer.toString(merge.segments.size())); @@ -3019,11 +3134,11 @@ mergingSegments.add(merge.info); } - private void setDiagnostics(SegmentInfo info, String source) { + static void setDiagnostics(SegmentInfo info, String source) { setDiagnostics(info, source, null); } - private void setDiagnostics(SegmentInfo info, String source, Map details) { + private static void setDiagnostics(SegmentInfo info, String source, Map details) { Map diagnostics = new HashMap(); diagnostics.put("source", source); diagnostics.put("lucene.version", Constants.LUCENE_VERSION); @@ -3041,7 +3156,7 @@ /** Does fininishing for a merge, which is fast but holds * the synchronized lock on IndexWriter instance. */ final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException { - + // Optimize, addIndexes or finishMerges may be waiting // on merges to finish. notifyAll(); @@ -3113,11 +3228,11 @@ * instance */ private int mergeMiddle(MergePolicy.OneMerge merge) throws CorruptIndexException, IOException { - + merge.checkAborted(directory); final String mergedName = merge.info.name; - + int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; @@ -3191,7 +3306,7 @@ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size()); } anyNonBulkMerges |= merger.getAnyNonBulkMerges(); - + assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount; // Very important to do this before opening the reader @@ -3325,12 +3440,12 @@ // For test purposes. final int getBufferedDeleteTermsSize() { - return docWriter.getPendingDeletes().terms.size(); + return docWriter.getBufferedDeleteTermsSize(); } // For test purposes. final int getNumBufferedDeleteTerms() { - return docWriter.getPendingDeletes().numTermDeletes.get(); + return docWriter.getNumBufferedDeleteTerms(); } // utility routines for tests @@ -3445,17 +3560,17 @@ assert lastCommitChangeCount <= changeCount; myChangeCount = changeCount; - + if (changeCount == lastCommitChangeCount) { if (infoStream != null) message(" skip startCommit(): no changes pending"); return; } - + // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() all files // referenced by toSync, in the background. - + if (infoStream != null) message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); @@ -3463,10 +3578,10 @@ toSync = (SegmentInfos) segmentInfos.clone(); assert filesExist(toSync); - + if (commitUserData != null) toSync.setUserData(commitUserData); - + // This protects the segmentInfos we are now going // to commit. This is important in case, eg, while // we are trying to sync all referenced files, a @@ -3598,7 +3713,7 @@ /** Expert: remove any index files that are no longer * used. - * + * *

IndexWriter normally deletes unused files itself, * during indexing. However, on Windows, which disallows * deletion of open files, if there is a reader open on @@ -3647,7 +3762,7 @@ public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) { payloadProcessorProvider = pcp; } - + /** * Returns the {@link PayloadProcessorProvider} that is used during segment * merges to process payloads. @@ -3655,124 +3770,4 @@ public PayloadProcessorProvider getPayloadProcessorProvider() { return payloadProcessorProvider; } - - // decides when flushes happen - final class FlushControl { - - private boolean flushPending; - private boolean flushDeletes; - private int delCount; - private int docCount; - private boolean flushing; - - private synchronized boolean setFlushPending(String reason, boolean doWait) { - if (flushPending || flushing) { - if (doWait) { - while(flushPending || flushing) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - } - return false; - } else { - if (infoStream != null) { - message("now trigger flush reason=" + reason); - } - flushPending = true; - return flushPending; - } - } - - public synchronized void setFlushPendingNoWait(String reason) { - setFlushPending(reason, false); - } - - public synchronized boolean getFlushPending() { - return flushPending; - } - - public synchronized boolean getFlushDeletes() { - return flushDeletes; - } - - public synchronized void clearFlushPending() { - if (infoStream != null) { - message("clearFlushPending"); - } - flushPending = false; - flushDeletes = false; - docCount = 0; - notifyAll(); - } - - public synchronized void clearDeletes() { - delCount = 0; - } - - public synchronized boolean waitUpdate(int docInc, int delInc) { - return waitUpdate(docInc, delInc, false); - } - - public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) { - while(flushPending) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - - // skipWait is only used when a thread is BOTH adding - // a doc and buffering a del term, and, the adding of - // the doc already triggered a flush - if (skipWait) { - docCount += docInc; - delCount += delInc; - return false; - } - - final int maxBufferedDocs = config.getMaxBufferedDocs(); - if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (docCount+docInc) >= maxBufferedDocs) { - return setFlushPending("maxBufferedDocs", true); - } - docCount += docInc; - - final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms(); - if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (delCount+delInc) >= maxBufferedDeleteTerms) { - flushDeletes = true; - return setFlushPending("maxBufferedDeleteTerms", true); - } - delCount += delInc; - - return flushByRAMUsage("add delete/doc"); - } - - public synchronized boolean flushByRAMUsage(String reason) { - final double ramBufferSizeMB = config.getRAMBufferSizeMB(); - if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { - final long limit = (long) (ramBufferSizeMB*1024*1024); - long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - - // DocumentsWriter may be able to free up some - // RAM: - // Lock order: FC -> DW - docWriter.balanceRAM(); - - used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - return setFlushPending("ram full: " + reason, false); - } - } - } - return false; - } - } - - final FlushControl flushControl = new FlushControl(); } Index: lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java (working copy) @@ -1,25 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -abstract class InvertedDocEndConsumerPerThread { - abstract void startDocument(); - abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract void finishDocument(); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (working copy) @@ -20,12 +20,13 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; import java.io.IOException; final class TermVectorsWriter { - + private IndexOutput tvx = null, tvd = null, tvf = null; private FieldInfos fieldInfos; @@ -46,7 +47,7 @@ /** * Add a complete document specified by all its term vectors. If document has no * term vectors, add value for tvx. - * + * * @param vectors * @throws IOException */ @@ -99,7 +100,7 @@ final int[] freqs = vectors[i].getTermFrequencies(); for (int j=0; j deletable; - /* Reference count for all files in the index. + /* Reference count for all files in the index. * Counts how many existing commits reference a file. **/ private Map refCounts = new HashMap(); @@ -88,7 +94,7 @@ * non-commit checkpoint: */ private List> lastFiles = new ArrayList>(); - /* Commits that the IndexDeletionPolicy have decided to delete: */ + /* Commits that the IndexDeletionPolicy have decided to delete: */ private List commitsToDelete = new ArrayList(); private PrintStream infoStream; @@ -108,7 +114,7 @@ message("setInfoStream deletionPolicy=" + policy); } } - + private void message(String message) { infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); } @@ -139,12 +145,12 @@ // counts: long currentGen = segmentInfos.getGeneration(); indexFilenameFilter = new IndexFileNameFilter(codecs); - + CommitPoint currentCommitPoint = null; String[] files = null; try { files = directory.listAll(); - } catch (NoSuchDirectoryException e) { + } catch (NoSuchDirectoryException e) { // it means the directory is empty, so ignore it. files = new String[0]; } @@ -152,7 +158,7 @@ for (String fileName : files) { if ((indexFilenameFilter.accept(null, fileName)) && !fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { - + // Add this file to refCounts with initial count 0: getRefCount(fileName); @@ -233,7 +239,7 @@ // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. - for(Map.Entry entry : refCounts.entrySet() ) { + for(Map.Entry entry : refCounts.entrySet() ) { RefCount rc = entry.getValue(); final String fileName = entry.getKey(); if (0 == rc.count) { @@ -253,7 +259,7 @@ // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit checkpoint(segmentInfos, false); - + startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted(); deleteCommits(); @@ -327,7 +333,7 @@ segmentPrefix1 = null; segmentPrefix2 = null; } - + for(int i=0;i oldDeletable = deletable; @@ -397,7 +403,7 @@ /** * For definition of "check point" see IndexWriter comments: * "Clarification: Check Points (and commits)". - * + * * Writer calls this when it has made a "consistent * change" to the index, meaning new files are written to * the index and the in-memory SegmentInfos have been @@ -417,7 +423,7 @@ public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException { if (infoStream != null) { - message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); + message("now checkpoint \"" + segmentInfos + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java (working copy) @@ -1,45 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class FreqProxTermsWriterPerThread extends TermsHashConsumerPerThread { - final TermsHashPerThread termsHashPerThread; - final DocumentsWriter.DocState docState; - - public FreqProxTermsWriterPerThread(TermsHashPerThread perThread) { - docState = perThread.docState; - termsHashPerThread = perThread; - } - - @Override - public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { - return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); - } - - @Override - void startDocument() { - } - - @Override - DocumentsWriter.DocWriter finishDocument() { - return null; - } - - @Override - public void abort() {} -} Index: lucene/src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -65,7 +65,7 @@ this.segmentCodecs = segmentCodecs; codecId = ""; } - + /** * Create a shallow {@link SegmentWriteState} copy final a codec ID */ Index: lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class InvertedDocConsumerPerThread { - abstract void startDocument() throws IOException; - abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/DocConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocConsumer.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocConsumer.java (working copy) @@ -18,11 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; abstract class DocConsumer { - abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException; - abstract void flush(final Collection threads, final SegmentWriteState state) throws IOException; + abstract void processDocument(FieldInfos fieldInfos) throws IOException; + abstract void finishDocument() throws IOException; + abstract void flush(final SegmentWriteState state) throws IOException; abstract void abort(); abstract boolean freeRAM(); + abstract void doAfterFlush(); } Index: lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java (working copy) @@ -1,92 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -/** This is a DocFieldConsumer that inverts each field, - * separately, from a Document, and accepts a - * InvertedTermsConsumer to process those terms. */ - -final class DocInverterPerThread extends DocFieldConsumerPerThread { - final DocInverter docInverter; - final InvertedDocConsumerPerThread consumer; - final InvertedDocEndConsumerPerThread endConsumer; - final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); - - static class SingleTokenAttributeSource extends AttributeSource { - final CharTermAttribute termAttribute; - final OffsetAttribute offsetAttribute; - - private SingleTokenAttributeSource() { - termAttribute = addAttribute(CharTermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - } - - public void reinit(String stringValue, int startOffset, int endOffset) { - termAttribute.setEmpty().append(stringValue); - offsetAttribute.setOffset(startOffset, endOffset); - } - } - - final DocumentsWriter.DocState docState; - - final FieldInvertState fieldState = new FieldInvertState(); - - // Used to read a string value for a field - final ReusableStringReader stringReader = new ReusableStringReader(); - - public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) { - this.docInverter = docInverter; - docState = docFieldProcessorPerThread.docState; - consumer = docInverter.consumer.addThread(this); - endConsumer = docInverter.endConsumer.addThread(this); - } - - @Override - public void startDocument() throws IOException { - consumer.startDocument(); - endConsumer.startDocument(); - } - - @Override - public DocumentsWriter.DocWriter finishDocument() throws IOException { - // TODO: allow endConsumer.finishDocument to also return - // a DocWriter - endConsumer.finishDocument(); - return consumer.finishDocument(); - } - - @Override - void abort() { - try { - consumer.abort(); - } finally { - endConsumer.abort(); - } - } - - @Override - public DocFieldConsumerPerField addField(FieldInfo fi) { - return new DocInverterPerField(this, fi); - } -} Index: lucene/src/java/org/apache/lucene/index/DocInverter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/DocInverter.java (working copy) @@ -18,13 +18,14 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; - import java.util.Map; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util.AttributeSource; + /** This is a DocFieldConsumer that inverts each field, * separately, from a Document, and accepts a * InvertedTermsConsumer to process those terms. */ @@ -34,42 +35,72 @@ final InvertedDocConsumer consumer; final InvertedDocEndConsumer endConsumer; - public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + final DocumentsWriterPerThread.DocState docState; + + final FieldInvertState fieldState = new FieldInvertState(); + + final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); + + static class SingleTokenAttributeSource extends AttributeSource { + final CharTermAttribute termAttribute; + final OffsetAttribute offsetAttribute; + + private SingleTokenAttributeSource() { + termAttribute = addAttribute(CharTermAttribute.class); + offsetAttribute = addAttribute(OffsetAttribute.class); + } + + public void reinit(String stringValue, int startOffset, int endOffset) { + termAttribute.setEmpty().append(stringValue); + offsetAttribute.setOffset(startOffset, endOffset); + } + } + + // Used to read a string value for a field + final ReusableStringReader stringReader = new ReusableStringReader(); + + public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + this.docState = docState; this.consumer = consumer; this.endConsumer = endConsumer; } @Override - void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> endChildThreadsAndFields = new HashMap>(); + Map childFieldsToFlush = new HashMap(); + Map endChildFieldsToFlush = new HashMap(); - for (Map.Entry> entry : threadsAndFields.entrySet() ) { + for (Map.Entry fieldToFlush : fieldsToFlush.entrySet()) { + DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue(); + childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer); + endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer); + } + consumer.flush(childFieldsToFlush, state); + endConsumer.flush(endChildFieldsToFlush, state); + } - DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey(); + @Override + public void startDocument() throws IOException { + consumer.startDocument(); + endConsumer.startDocument(); + } - Collection childFields = new HashSet(); - Collection endChildFields = new HashSet(); - for (final DocFieldConsumerPerField field: entry.getValue() ) { - DocInverterPerField perField = (DocInverterPerField) field; - childFields.add(perField.consumer); - endChildFields.add(perField.endConsumer); - } - - childThreadsAndFields.put(perThread.consumer, childFields); - endChildThreadsAndFields.put(perThread.endConsumer, endChildFields); - } - - consumer.flush(childThreadsAndFields, state); - endConsumer.flush(endChildThreadsAndFields, state); + public void finishDocument() throws IOException { + // TODO: allow endConsumer.finishDocument to also return + // a DocWriter + endConsumer.finishDocument(); + consumer.finishDocument(); } @Override void abort() { - consumer.abort(); - endConsumer.abort(); + try { + consumer.abort(); + } finally { + endConsumer.abort(); + } } @Override @@ -78,7 +109,8 @@ } @Override - public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) { - return new DocInverterPerThread(docFieldProcessorPerThread, this); + public DocFieldConsumerPerField addField(FieldInfo fi) { + return new DocInverterPerField(this, fi); } + } Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java (working copy) @@ -1,89 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.BytesRef; - -final class TermVectorsTermsWriterPerThread extends TermsHashConsumerPerThread { - - final TermVectorsTermsWriter termsWriter; - final TermsHashPerThread termsHashPerThread; - final DocumentsWriter.DocState docState; - final BytesRef flushTerm = new BytesRef(); - - TermVectorsTermsWriter.PerDoc doc; - - public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter) { - this.termsWriter = termsWriter; - this.termsHashPerThread = termsHashPerThread; - docState = termsHashPerThread.docState; - } - - // Used by perField when serializing the term vectors - final ByteSliceReader vectorSliceReader = new ByteSliceReader(); - - @Override - public void startDocument() { - assert clearLastVectorFieldName(); - if (doc != null) { - doc.reset(); - doc.docID = docState.docID; - } - } - - @Override - public DocumentsWriter.DocWriter finishDocument() { - try { - return doc; - } finally { - doc = null; - } - } - - @Override - public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { - return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo); - } - - @Override - public void abort() { - if (doc != null) { - doc.abort(); - doc = null; - } - } - - // Called only by assert - final boolean clearLastVectorFieldName() { - lastVectorFieldName = null; - return true; - } - - // Called only by assert - String lastVectorFieldName; - final boolean vectorFieldsInOrder(FieldInfo fi) { - try { - if (lastVectorFieldName != null) - return lastVectorFieldName.compareTo(fi.name) < 0; - else - return true; - } finally { - lastVectorFieldName = fi.name; - } - } -} Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy) @@ -18,7 +18,7 @@ */ import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.IndexSearcher; @@ -41,7 +41,7 @@ * IndexWriterConfig conf = new IndexWriterConfig(analyzer); * conf.setter1().setter2(); * - * + * * @since 3.1 */ public final class IndexWriterConfig implements Cloneable { @@ -56,7 +56,7 @@ * */ public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND } - + /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here @@ -77,7 +77,7 @@ /** * Default value for the write lock timeout (1,000 ms). - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long WRITE_LOCK_TIMEOUT = 1000; @@ -94,6 +94,8 @@ /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */ public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; + /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */ + public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945; /** * Sets the default (for any instance) maximum time to wait for a write lock * (in milliseconds). @@ -105,7 +107,7 @@ /** * Returns the default write lock timeout for newly instantiated * IndexWriterConfigs. - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long getDefaultWriteLockTimeout() { @@ -127,10 +129,12 @@ private volatile IndexReaderWarmer mergedSegmentWarmer; private volatile CodecProvider codecProvider; private volatile MergePolicy mergePolicy; - private volatile int maxThreadStates; + private volatile DocumentsWriterPerThreadPool indexerThreadPool; private volatile boolean readerPooling; private volatile int readerTermsIndexDivisor; - + private volatile FlushPolicy flushPolicy; + private volatile int perThreadHardLimitMB; + private Version matchVersion; /** @@ -153,15 +157,16 @@ maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; - indexingChain = DocumentsWriter.defaultIndexingChain; + indexingChain = DocumentsWriterPerThread.defaultIndexingChain; mergedSegmentWarmer = null; codecProvider = CodecProvider.getDefault(); mergePolicy = new TieredMergePolicy(); - maxThreadStates = DEFAULT_MAX_THREAD_STATES; readerPooling = DEFAULT_READER_POOLING; + indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(DEFAULT_MAX_THREAD_STATES); readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; + perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; } - + @Override public Object clone() { // Shallow clone is the only thing that's possible, since parameters like @@ -186,7 +191,7 @@ this.openMode = openMode; return this; } - + /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { return openMode; @@ -261,7 +266,7 @@ public SimilarityProvider getSimilarityProvider() { return similarityProvider; } - + /** * Expert: set the interval between indexed terms. Large values cause less * memory to be used by IndexReader, but slow random-access to terms. Small @@ -281,7 +286,7 @@ * In particular, numUniqueTerms/interval terms are read into * memory by an IndexReader, and, on average, interval/2 terms * must be scanned for each random term access. - * + * * @see #DEFAULT_TERM_INDEX_INTERVAL * *

Takes effect immediately, but only applies to newly @@ -293,7 +298,7 @@ /** * Returns the interval between indexed terms. - * + * * @see #setTermIndexInterval(int) */ public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here @@ -331,10 +336,10 @@ this.writeLockTimeout = writeLockTimeout; return this; } - + /** * Returns allowed timeout when acquiring the write lock. - * + * * @see #setWriteLockTimeout(long) */ public long getWriteLockTimeout() { @@ -348,10 +353,11 @@ * created. *

Disabled by default (writer flushes by RAM usage). - * + * * @throws IllegalArgumentException if maxBufferedDeleteTerms * is enabled but smaller than 1 * @see #setRAMBufferSizeMB + * @see #setFlushPolicy(FlushPolicy) * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. @@ -368,7 +374,7 @@ /** * Returns the number of buffered deleted terms that will trigger a flush if * enabled. - * + * * @see #setMaxBufferedDeleteTerms(int) */ public int getMaxBufferedDeleteTerms() { @@ -380,45 +386,47 @@ * and deletions before they are flushed to the Directory. Generally for * faster indexing performance it's best to flush by RAM usage instead of * document count and use as large a RAM buffer as you can. - * *

* When this is set, the writer will flush whenever buffered documents and * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent * triggering a flush due to RAM usage. Note that if flushing by document * count is also enabled, then the flush will be triggered by whichever comes * first. - * *

+ * The maximum RAM limit is inherently determined by the JVMs available memory. + * Yet, an {@link IndexWriter} session can consume a significantly larger amount + * of memory than the given RAM limit since this limit is just an indicator when + * to flush memory resident documents to the Directory. Flushes are likely happen + * concurrently while other threads adding documents to the writer. For application + * stability the available memory in the JVM should be significantly larger than + * the RAM buffer used for indexing. + *

* NOTE: the account of RAM usage for pending deletions is only * approximate. Specifically, if you delete by Query, Lucene currently has no * way to measure the RAM usage of individual Queries so the accounting will * under-estimate and you should compensate by either calling commit() * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} - * to flush by count instead of RAM usage (each buffered delete Query counts + * to flush by count instead of RAM usage (each buffered delete Query counts * as one). - * *

- * NOTE: because IndexWriter uses ints when managing its - * internal storage, the absolute maximum value for this setting is somewhat - * less than 2048 MB. The precise limit depends on various factors, such as - * how large your documents are, how many fields have norms, etc., so it's - * best to set this value comfortably under 2048. + * NOTE: It's not guaranteed that all memory resident documents are flushed + * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a + * subset of the buffered documents are flushed and therefore only parts of the RAM + * buffer is released. + *

* - *

* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. - * + * @see #setFlushPolicy(FlushPolicy) + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @throws IllegalArgumentException * if ramBufferSize is enabled but non-positive, or it disables * ramBufferSize when maxBufferedDocs is already disabled + * */ public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { - if (ramBufferSizeMB > 2048.0) { - throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB - + " is too large; should be comfortably less than 2048"); - } if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) throw new IllegalArgumentException( "ramBufferSize should be > 0.0 MB when enabled"); @@ -438,22 +446,22 @@ * Determines the minimal number of documents required before the buffered * in-memory documents are flushed as a new Segment. Large values generally * give faster indexing. - * + * *

* When this is set, the writer will flush every maxBufferedDocs added * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a * flush due to number of buffered documents. Note that if flushing by RAM * usage is also enabled, then the flush will be triggered by whichever comes * first. - * + * *

* Disabled by default (writer flushes by RAM usage). - * + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @see #setRAMBufferSizeMB(double) - * + * @see #setFlushPolicy(FlushPolicy) * @throws IllegalArgumentException * if maxBufferedDocs is enabled but smaller than 2, or it disables * maxBufferedDocs when ramBufferSize is already disabled @@ -473,7 +481,7 @@ /** * Returns the number of buffered added documents that will trigger a flush if * enabled. - * + * * @see #setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() { @@ -519,10 +527,10 @@ return codecProvider; } - + /** * Returns the current MergePolicy in use by this writer. - * + * * @see #setMergePolicy(MergePolicy) */ public MergePolicy getMergePolicy() { @@ -536,15 +544,19 @@ * {@link #DEFAULT_MAX_THREAD_STATES}. * *

Only takes effect when IndexWriter is first created. */ - public IndexWriterConfig setMaxThreadStates(int maxThreadStates) { - this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates; + public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) { + this.indexerThreadPool = threadPool; return this; } + public DocumentsWriterPerThreadPool getIndexerThreadPool() { + return this.indexerThreadPool; + } + /** Returns the max number of simultaneous threads that * may be indexing documents at once in IndexWriter. */ public int getMaxThreadStates() { - return maxThreadStates; + return indexerThreadPool.getMaxThreadStates(); } /** By default, IndexWriter does not pool the @@ -572,10 +584,10 @@ * *

Only takes effect when IndexWriter is first created. */ IndexWriterConfig setIndexingChain(IndexingChain indexingChain) { - this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain; + this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain; return this; } - + /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */ IndexingChain getIndexingChain() { return indexingChain; @@ -604,6 +616,53 @@ return readerTermsIndexDivisor; } + /** + * Expert: Controls when segments are flushed to disk during indexing. + * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized + * the given instance is bound to this {@link IndexWriter} and should not be used with another writer. + * @see #setMaxBufferedDeleteTerms(int) + * @see #setMaxBufferedDocs(int) + * @see #setRAMBufferSizeMB(double) + */ + public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) { + this.flushPolicy = flushPolicy; + return this; + } + + /** + * Expert: Sets the maximum memory consumption per thread triggering a forced + * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed + * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has + * not been exceeded. This is a safety limit to prevent a + * {@link DocumentsWriterPerThread} from address space exhaustion due to its + * internal 32 bit signed integer based memory addressing. + * The given value must be less that 2GB (2048MB) + * + * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB + */ + public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) { + if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) { + throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB"); + } + this.perThreadHardLimitMB = perThreadHardLimitMB; + return this; + } + + /** + * Returns the max amount of memory each {@link DocumentsWriterPerThread} can + * consume until forcefully flushed. + * @see #setRAMPerThreadHardLimitMB(int) + */ + public int getRAMPerThreadHardLimitMB() { + return perThreadHardLimitMB; + } + /** + * @see #setFlushPolicy(FlushPolicy) + */ + public FlushPolicy getFlushPolicy() { + return flushPolicy; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -623,9 +682,14 @@ sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n"); sb.append("codecProvider=").append(codecProvider).append("\n"); sb.append("mergePolicy=").append(mergePolicy).append("\n"); - sb.append("maxThreadStates=").append(maxThreadStates).append("\n"); + sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n"); + sb.append("maxThreadStates=").append(indexerThreadPool.getMaxThreadStates()).append("\n"); sb.append("readerPooling=").append(readerPooling).append("\n"); sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n"); + sb.append("flushPolicy=").append(flushPolicy).append("\n"); + sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n"); + return sb.toString(); } + } Index: lucene/src/java/org/apache/lucene/index/NormsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriter.java (revision 1097270) +++ lucene/src/java/org/apache/lucene/index/NormsWriter.java (working copy) @@ -19,11 +19,7 @@ import java.io.IOException; import java.util.Collection; -import java.util.Iterator; -import java.util.HashMap; import java.util.Map; -import java.util.List; -import java.util.ArrayList; import org.apache.lucene.store.IndexOutput; @@ -36,10 +32,6 @@ final class NormsWriter extends InvertedDocEndConsumer { - @Override - public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new NormsWriterPerThread(docInverterPerThread, this); - } @Override public void abort() {} @@ -50,40 +42,11 @@ /** Produce _X.nrm if any document had a field with norms * not disabled */ @Override - public void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { - - final Map> byField = new HashMap>(); - + public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { if (!state.fieldInfos.hasNorms()) { return; } - // Typically, each thread will have encountered the same - // field. So first we collate by field, ie, all - // per-thread field instances that correspond to the - // same FieldInfo - for (final Map.Entry> entry : threadsAndFields.entrySet()) { - final Collection fields = entry.getValue(); - final Iterator fieldsIt = fields.iterator(); - - while (fieldsIt.hasNext()) { - final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next(); - - if (perField.upto > 0) { - // It has some norms - List l = byField.get(perField.fieldInfo); - if (l == null) { - l = new ArrayList(); - byField.put(perField.fieldInfo, l); - } - l.add(perField); - } else - // Remove this field since we haven't seen it - // since the previous flush - fieldsIt.remove(); - } - } - final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION); IndexOutput normsOut = state.directory.createOutput(normsFileName); @@ -93,60 +56,25 @@ int normCount = 0; for (FieldInfo fi : state.fieldInfos) { - final List toMerge = byField.get(fi); + final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi); int upto = 0; - if (toMerge != null) { - - final int numFields = toMerge.size(); - + if (toWrite != null && toWrite.upto > 0) { normCount++; - final NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; - int[] uptos = new int[numFields]; - - for(int j=0;j 0) { - - assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length); - - int minLoc = 0; - int minDocID = fields[0].docIDs[uptos[0]]; - - for(int j=1;j files) throws IOException { - final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT); + final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT); files.add(seedFileName); SepPostingsReaderImpl.files(segmentInfo, codecId, files); StandardPostingsReader.files(dir, segmentInfo, codecId, files); BlockTermsReader.files(dir, segmentInfo, codecId, files); FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files); - + // hackish! Iterator it = files.iterator(); while(it.hasNext()) { Index: lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (revision 1097270) +++ lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java (working copy) @@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThrottledIndexOutput; import org.apache.lucene.util._TestUtil; /** @@ -69,6 +70,7 @@ private Set createdFiles; Set openFilesForWrite = new HashSet(); volatile boolean crashed; + private ThrottledIndexOutput throttledOutput; // use this for tracking files for crash. // additionally: provides debugging information in case you leave one open @@ -114,6 +116,10 @@ public void setPreventDoubleWrite(boolean value) { preventDoubleWrite = value; } + + public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) { + this.throttledOutput = throttledOutput; + } @Override public synchronized void sync(Collection names) throws IOException { @@ -348,7 +354,7 @@ IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name); openFileHandles.put(io, new RuntimeException("unclosed IndexOutput")); openFilesForWrite.add(name); - return io; + return throttledOutput == null ? io : throttledOutput.newFromDelegate(io); } @Override @@ -578,4 +584,5 @@ maybeYield(); delegate.copy(to, src, dest); } + } Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1097270) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -116,7 +116,7 @@ * If this is set, it is the only method that should run. */ static final String TEST_METHOD; - + /** Create indexes in this directory, optimally use a subdir, named after the test */ public static final File TEMP_DIR; static { @@ -163,11 +163,11 @@ * multiply it by the number of iterations */ public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1")); - + private int savedBoolMaxClauseCount; private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null; - + /** Used to track if setUp and tearDown are called correctly from subclasses */ private boolean setup; @@ -189,28 +189,28 @@ private static class UncaughtExceptionEntry { public final Thread thread; public final Throwable exception; - + public UncaughtExceptionEntry(Thread thread, Throwable exception) { this.thread = thread; this.exception = exception; } } private List uncaughtExceptions = Collections.synchronizedList(new ArrayList()); - + // saves default codec: we do this statically as many build indexes in @beforeClass private static String savedDefaultCodec; // default codec: not set when we use a per-field provider. private static Codec codec; // default codec provider private static CodecProvider savedCodecProvider; - + private static Locale locale; private static Locale savedLocale; private static TimeZone timeZone; private static TimeZone savedTimeZone; - + private static Map stores; - + private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"}; private static void swapCodec(Codec c, CodecProvider cp) { @@ -288,7 +288,7 @@ // randomly picks from core and test codecs static String pickRandomCodec(Random rnd) { - int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + + int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + TEST_CODECS.length); if (idx < CodecProvider.CORE_CODECS.length) { return CodecProvider.CORE_CODECS[idx]; @@ -321,7 +321,7 @@ /** @deprecated (4.0) until we fix no-fork problems in solr tests */ @Deprecated private static List testClassesRun = new ArrayList(); - + @BeforeClass public static void beforeClassLuceneTestCaseJ4() { staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1; @@ -347,7 +347,7 @@ TimeZone.setDefault(timeZone); testsFailed = false; } - + @AfterClass public static void afterClassLuceneTestCaseJ4() { if (! "false".equals(TEST_CLEAN_THREADS)) { @@ -363,12 +363,12 @@ if ("randomPerField".equals(TEST_CODEC)) { if (cp instanceof RandomCodecProvider) codecDescription = cp.toString(); - else + else codecDescription = "PreFlex"; } else { codecDescription = codec.toString(); } - + if (CodecProvider.getDefault() == savedCodecProvider) removeTestCodecs(codec, CodecProvider.getDefault()); CodecProvider.setDefault(savedCodecProvider); @@ -398,14 +398,14 @@ stores = null; // if verbose or tests failed, report some information back if (VERBOSE || testsFailed) - System.err.println("NOTE: test params are: codec=" + codecDescription + - ", locale=" + locale + + System.err.println("NOTE: test params are: codec=" + codecDescription + + ", locale=" + locale + ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID())); if (testsFailed) { System.err.println("NOTE: all tests run in this JVM:"); System.err.println(Arrays.toString(testClassesRun.toArray())); - System.err.println("NOTE: " + System.getProperty("os.name") + " " - + System.getProperty("os.version") + " " + System.err.println("NOTE: " + System.getProperty("os.name") + " " + + System.getProperty("os.version") + " " + System.getProperty("os.arch") + "/" + System.getProperty("java.vendor") + " " + System.getProperty("java.version") + " " @@ -428,7 +428,7 @@ } private static boolean testsFailed; /* true if any tests failed */ - + // This is how we get control when errors occur. // Think of this as start/end/success/failed // events. @@ -463,7 +463,7 @@ LuceneTestCase.this.name = method.getName(); super.starting(method); } - + }; @Before @@ -481,7 +481,7 @@ savedUncaughtExceptionHandler.uncaughtException(t, e); } }); - + savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); } @@ -513,7 +513,7 @@ if ("perMethod".equals(TEST_CLEAN_THREADS)) { int rogueThreads = threadCleanup("test method: '" + getName() + "'"); if (rogueThreads > 0) { - System.err.println("RESOURCE LEAK: test method: '" + getName() + System.err.println("RESOURCE LEAK: test method: '" + getName() + "' left " + rogueThreads + " thread(s) running"); // TODO: fail, but print seed for now. if (!testsFailed && uncaughtExceptions.isEmpty()) { @@ -535,18 +535,18 @@ fail("Some threads threw uncaught exceptions!"); } - // calling assertSaneFieldCaches here isn't as useful as having test - // classes call it directly from the scope where the index readers - // are used, because they could be gc'ed just before this tearDown + // calling assertSaneFieldCaches here isn't as useful as having test + // classes call it directly from the scope where the index readers + // are used, because they could be gc'ed just before this tearDown // method is called. // // But it's better then nothing. // - // If you are testing functionality that you know for a fact - // "violates" FieldCache sanity, then you should either explicitly + // If you are testing functionality that you know for a fact + // "violates" FieldCache sanity, then you should either explicitly // call purgeFieldCache at the end of your test method, or refactor - // your Test class so that the inconsistant FieldCache usages are - // isolated in distinct test methods + // your Test class so that the inconsistant FieldCache usages are + // isolated in distinct test methods assertSaneFieldCaches(getTestLabel()); } finally { @@ -557,14 +557,14 @@ private final static int THREAD_STOP_GRACE_MSEC = 50; // jvm-wide list of 'rogue threads' we found, so they only get reported once. private final static IdentityHashMap rogueThreads = new IdentityHashMap(); - + static { // just a hack for things like eclipse test-runner threads for (Thread t : Thread.getAllStackTraces().keySet()) { rogueThreads.put(t, true); } } - + /** * Looks for leftover running threads, trying to kill them off, * so they don't fail future tests. @@ -575,20 +575,20 @@ Thread[] stillRunning = new Thread[Thread.activeCount()+1]; int threadCount = 0; int rogueCount = 0; - + if ((threadCount = Thread.enumerate(stillRunning)) > 1) { while (threadCount == stillRunning.length) { // truncated response stillRunning = new Thread[stillRunning.length*2]; threadCount = Thread.enumerate(stillRunning); } - + for (int i = 0; i < threadCount; i++) { Thread t = stillRunning[i]; - - if (t.isAlive() && - !rogueThreads.containsKey(t) && - t != Thread.currentThread() && + + if (t.isAlive() && + !rogueThreads.containsKey(t) && + t != Thread.currentThread() && /* its ok to keep your searcher across test cases */ (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) { System.err.println("WARNING: " + context + " left thread running: " + t); @@ -613,7 +613,7 @@ } return rogueCount; } - + /** * Asserts that FieldCacheSanityChecker does not detect any * problems with FieldCache.DEFAULT. @@ -656,13 +656,13 @@ } } - + // @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed: @Deprecated static public void assertEquals(double expected, double actual) { assertEquals(null, expected, actual); } - + @Deprecated static public void assertEquals(String message, double expected, double actual) { assertEquals(message, Double.valueOf(expected), Double.valueOf(actual)); @@ -677,18 +677,18 @@ static public void assertEquals(String message, float expected, float actual) { assertEquals(message, Float.valueOf(expected), Float.valueOf(actual)); } - + // Replacement for Assume jUnit class, so we can add a message with explanation: - + private static final class TestIgnoredException extends RuntimeException { TestIgnoredException(String msg) { super(msg); } - + TestIgnoredException(String msg, Throwable t) { super(msg, t); } - + @Override public String getMessage() { StringBuilder sb = new StringBuilder(super.getMessage()); @@ -696,7 +696,7 @@ sb.append(" - ").append(getCause()); return sb.toString(); } - + // only this one is called by our code, exception is not used outside this class: @Override public void printStackTrace(PrintStream s) { @@ -708,25 +708,25 @@ } } } - + public static void assumeTrue(String msg, boolean b) { Assume.assumeNoException(b ? null : new TestIgnoredException(msg)); } - + public static void assumeFalse(String msg, boolean b) { assumeTrue(msg, !b); } - + public static void assumeNoException(String msg, Exception e) { Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e)); } - + public static Set asSet(T... args) { return new HashSet(Arrays.asList(args)); } /** - * Convenience method for logging an iterator. + * Convinience method for logging an iterator. * * @param label String logged before/after the items in the iterator * @param iter Each next() is toString()ed and logged on it's own line. If iter is null this is logged differnetly then an empty iterator. @@ -746,7 +746,7 @@ } /** - * Convenience method for logging an array. Wraps the array in an iterator and delegates + * Convinience method for logging an array. Wraps the array in an iterator and delegates * * @see #dumpIterator(String,Iterator,PrintStream) */ @@ -760,8 +760,7 @@ public static IndexWriterConfig newIndexWriterConfig(Version v, Analyzer a) { return newIndexWriterConfig(random, v, a); } - - /** create a new index writer config with random defaults using the specified random */ + public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) { IndexWriterConfig c = new IndexWriterConfig(v, a); if (r.nextBoolean()) { @@ -778,7 +777,7 @@ c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000)); } if (r.nextBoolean()) { - c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20)); + c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20))); } if (r.nextBoolean()) { @@ -864,7 +863,7 @@ public static MockDirectoryWrapper newDirectory() throws IOException { return newDirectory(random); } - + /** * Returns a new Directory instance, using the specified random. * See {@link #newDirectory()} for more information. @@ -875,7 +874,7 @@ stores.put(dir, Thread.currentThread().getStackTrace()); return dir; } - + /** * Returns a new Directory instance, with contents copied from the * provided directory. See {@link #newDirectory()} for more @@ -884,23 +883,23 @@ public static MockDirectoryWrapper newDirectory(Directory d) throws IOException { return newDirectory(random, d); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f) throws IOException { return newFSDirectory(f, null); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException { String fsdirClass = TEST_DIRECTORY; if (fsdirClass.equals("random")) { fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; } - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + Class clazz; try { try { @@ -908,11 +907,11 @@ } catch (ClassCastException e) { // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); } MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf)); @@ -922,7 +921,7 @@ throw new RuntimeException(e); } } - + /** * Returns a new Directory instance, using the specified random * with contents copied from the provided directory. See @@ -937,12 +936,14 @@ stores.put(dir, Thread.currentThread().getStackTrace()); return dir; } + /** Returns a new field instance. * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ public static Field newField(String name, String value, Index index) { return newField(random, name, value, index); } + /** Returns a new field instance. * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ @@ -962,62 +963,65 @@ public static Field newField(String name, String value, Store store, Index index, TermVector tv) { return newField(random, name, value, store, index, tv); } + /** Returns a new field instance, using the specified random. * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ public static Field newField(Random random, String name, String value, Index index) { return newField(random, name, value, Store.NO, index); } + /** Returns a new field instance, using the specified random. * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ public static Field newField(Random random, String name, String value, Store store, Index index) { return newField(random, name, value, store, index, TermVector.NO); } + /** Returns a new field instance, using the specified random. * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { if (!index.isIndexed()) return new Field(name, value, store, index); - + if (!store.isStored() && random.nextBoolean()) store = Store.YES; // randomly store it - + tv = randomTVSetting(random, tv); - + return new Field(name, value, store, index, tv); } - - static final TermVector tvSettings[] = { - TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, - TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS + + static final TermVector tvSettings[] = { + TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, + TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS }; - + private static TermVector randomTVSetting(Random random, TermVector minimum) { switch(minimum) { case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)]; case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)]; - case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS + case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS : TermVector.WITH_POSITIONS_OFFSETS; - case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS + case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS : TermVector.WITH_POSITIONS_OFFSETS; default: return TermVector.WITH_POSITIONS_OFFSETS; } } - + /** return a random Locale from the available locales on the system */ public static Locale randomLocale(Random random) { Locale locales[] = Locale.getAvailableLocales(); return locales[random.nextInt(locales.length)]; } - + /** return a random TimeZone from the available timezones on the system */ public static TimeZone randomTimeZone(Random random) { String tzIds[] = TimeZone.getAvailableIDs(); return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]); } - + /** return a Locale object equivalent to its programmatic name */ public static Locale localeForName(String localeName) { String elements[] = localeName.split("\\_"); @@ -1039,7 +1043,7 @@ "RAMDirectory", FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2] }; - + public static String randomDirectory(Random random) { if (random.nextInt(10) == 0) { return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)]; @@ -1064,7 +1068,7 @@ return FSDirectory.open(file); } } - + static Directory newDirectoryImpl(Random random, String clazzName) { if (clazzName.equals("random")) clazzName = randomDirectory(random); @@ -1085,9 +1089,9 @@ return clazz.newInstance(); } catch (Exception e) { throw new RuntimeException(e); - } + } } - + /** create a new searcher over the reader. * This searcher might randomly use threads. */ public static IndexSearcher newSearcher(IndexReader r) throws IOException { @@ -1095,8 +1099,8 @@ return new IndexSearcher(r); } else { int threads = 0; - final ExecutorService ex = (random.nextBoolean()) ? null - : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), + final ExecutorService ex = (random.nextBoolean()) ? null + : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), new NamedThreadFactory("LuceneTestCase")); if (ex != null && VERBOSE) { System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); @@ -1121,12 +1125,12 @@ public String getName() { return this.name; } - + /** Gets a resource from the classpath as {@link File}. This method should only be used, * if a real file is needed. To get a stream, code should prefer * {@link Class#getResourceAsStream} using {@code this.getClass()}. */ - + protected File getDataFile(String name) throws IOException { try { return new File(this.getClass().getResource(name).toURI()); @@ -1137,11 +1141,11 @@ // We get here from InterceptTestCaseEvents on the 'failed' event.... public void reportAdditionalFailureInfo() { - System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed) + reproduceWithExtraParams()); } - + // extra params that were overridden needed to reproduce the command private String reproduceWithExtraParams() { StringBuilder sb = new StringBuilder(); @@ -1157,12 +1161,12 @@ private static long staticSeed; // seed for individual test methods, changed in @before private long seed; - + private static final Random seedRand = new Random(); protected static final Random random = new Random(0); private String name = ""; - + /** * Annotation for tests that should only be run during nightly builds. */ @@ -1170,7 +1174,7 @@ @Inherited @Retention(RetentionPolicy.RUNTIME) public @interface Nightly {} - + /** optionally filters the tests to be run by TEST_METHOD */ public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner { private List testMethods; @@ -1200,11 +1204,11 @@ testMethods.add(new FrameworkMethod(m)); } } - + if (testMethods.isEmpty()) { throw new RuntimeException("No runnable methods!"); } - + if (TEST_NIGHTLY == false) { if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) { /* the test class is annotated with nightly, remove all methods */ @@ -1265,9 +1269,9 @@ @Override public boolean shouldRun(Description d) { return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD); - } + } }; - + try { f.apply(this); } catch (NoTestsRemainException e) { @@ -1275,12 +1279,12 @@ } } } - + private static class RandomCodecProvider extends CodecProvider { private List knownCodecs = new ArrayList(); private Map previousMappings = new HashMap(); private final int perFieldSeed; - + RandomCodecProvider(Random random) { this.perFieldSeed = random.nextInt(); register(new StandardCodec()); @@ -1312,13 +1316,13 @@ } return codec.name; } - + @Override - public synchronized String toString() { + public String toString() { return "RandomCodecProvider: " + previousMappings.toString(); } } - + @Ignore("just a hack") public final void alwaysIgnoredTestMethod() {} } Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (revision 1097270) +++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (working copy) @@ -79,23 +79,23 @@ } } - /** - * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first + /** + * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first */ public static void unzip(File zipName, File destDir) throws IOException { - + ZipFile zipFile = new ZipFile(zipName); - + Enumeration entries = zipFile.entries(); - + rmDir(destDir); - + destDir.mkdir(); LuceneTestCase.tempDirs.add(destDir.getAbsolutePath()); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); - + InputStream in = zipFile.getInputStream(entry); File targetFile = new File(destDir, entry.getName()); if (entry.isDirectory()) { @@ -105,24 +105,24 @@ if (targetFile.getParentFile()!=null) { // be on the safe side: do not rely on that directories are always extracted // before their children (although this makes sense, but is it guaranteed?) - targetFile.getParentFile().mkdirs(); + targetFile.getParentFile().mkdirs(); } OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile)); - + byte[] buffer = new byte[8192]; int len; while((len = in.read(buffer)) >= 0) { out.write(buffer, 0, len); } - + in.close(); out.close(); } } - + zipFile.close(); } - + public static void syncConcurrentMerges(IndexWriter writer) { syncConcurrentMerges(writer.getConfig().getMergeScheduler()); } @@ -138,7 +138,7 @@ public static CheckIndex.Status checkIndex(Directory dir) throws IOException { return checkIndex(dir, CodecProvider.getDefault()); } - + /** This runs the CheckIndex tool on the index in. If any * issues are hit, a RuntimeException is thrown; else, * true is returned. */ @@ -221,62 +221,62 @@ } private static final int[] blockStarts = { - 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, - 0x0500, 0x0530, 0x0590, 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800, - 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 0x0C00, 0x0C80, 0x0D00, - 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380, - 0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, - 0x1800, 0x18B0, 0x1900, 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00, - 0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80, 0x1DC0, 0x1E00, 0x1F00, - 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300, - 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0, - 0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80, - 0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80, 0x2F00, 0x2FF0, 0x3000, - 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200, - 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640, - 0xA6A0, 0xA700, 0xA720, 0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900, - 0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80, 0xABC0, 0xAC00, 0xD7B0, - 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10, - 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0, - 0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0, - 0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800, - 0x10840, 0x10900, 0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60, - 0x10C00, 0x10E60, 0x11080, 0x12000, 0x12400, 0x13000, 0x1D000, 0x1D100, - 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200, + 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, + 0x0500, 0x0530, 0x0590, 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800, + 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 0x0C00, 0x0C80, 0x0D00, + 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380, + 0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, + 0x1800, 0x18B0, 0x1900, 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00, + 0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80, 0x1DC0, 0x1E00, 0x1F00, + 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300, + 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0, + 0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80, + 0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80, 0x2F00, 0x2FF0, 0x3000, + 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200, + 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640, + 0xA6A0, 0xA700, 0xA720, 0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900, + 0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80, 0xABC0, 0xAC00, 0xD7B0, + 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10, + 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0, + 0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0, + 0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800, + 0x10840, 0x10900, 0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60, + 0x10C00, 0x10E60, 0x11080, 0x12000, 0x12400, 0x13000, 0x1D000, 0x1D100, + 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200, 0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000 }; - + private static final int[] blockEnds = { - 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, - 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F, - 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F, - 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F, - 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF, - 0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F, - 0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF, - 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF, - 0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, - 0x27FF, 0x28FF, 0x297F, 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF, - 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F, - 0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF, - 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F, - 0xA6FF, 0xA71F, 0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F, - 0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF, 0xABFF, 0xD7AF, 0xD7FF, - 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F, - 0xFE2F, 0xFE4F, 0xFE6F, 0xFEFF, 0xFFEF, 0xFFFF, - 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF, - 0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F, - 0x1085F, 0x1091F, 0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F, - 0x10C4F, 0x10E7F, 0x110CF, 0x123FF, 0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF, - 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF, + 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, + 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F, + 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F, + 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F, + 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF, + 0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F, + 0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF, + 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF, + 0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, + 0x27FF, 0x28FF, 0x297F, 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF, + 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F, + 0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF, + 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F, + 0xA6FF, 0xA71F, 0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F, + 0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF, 0xABFF, 0xD7AF, 0xD7FF, + 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F, + 0xFE2F, 0xFE4F, 0xFE6F, 0xFEFF, 0xFFEF, 0xFFFF, + 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF, + 0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F, + 0x1085F, 0x1091F, 0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F, + 0x10C4F, 0x10E7F, 0x110CF, 0x123FF, 0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF, + 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF, 0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF }; - + /** Returns random string, all codepoints within the same unicode block. */ public static String randomRealisticUnicodeString(Random r) { return randomRealisticUnicodeString(r, 20); } - + /** Returns random string, all codepoints within the same unicode block. */ public static String randomRealisticUnicodeString(Random r, int maxLength) { final int end = r.nextInt(maxLength); Property changes on: lucene\contrib\lucli\build.xml ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/branch_3x/lucene/contrib/lucli/build.xml:r1090423,1091501 Merged /lucene/dev/branches/realtime_search/lucene/contrib/lucli/build.xml:r953476-1097271 Merged /lucene/solr/branches/newtrunk/lucene/contrib/lucli/build.xml:r924462-924482 Property changes on: README.txt ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/branch_3x/README.txt:r949730,957490,961612,979161,980654,982195,987811,988512,1025544,1026614,1034080,1039151,1050654,1056762,1060014,1060438,1060784,1061035,1063880,1063934,1065324,1066771,1067699,1067764,1070818,1075044,1079376,1079381,1080071,1081052,1083240,1083534,1083726,1083789,1083812,1085811,1090423,1091501,1092373,1095519 Merged /lucene/dev/branches/lucene_solr_3_1/README.txt:r1081856,1083239,1085499,1085511,1085532,1085809 Merged /lucene/dev/branches/bulkpostings/README.txt:r1069647,1069925 Merged /lucene/dev/branches/preflexfixes/README.txt:r967125-979432 Index: dev-tools/idea/solr/solr.iml =================================================================== --- dev-tools/idea/solr/solr.iml (revision 1097270) +++ dev-tools/idea/solr/solr.iml (working copy) @@ -10,6 +10,7 @@ + Index: dev-tools/idea/modules/analysis/common/common.iml =================================================================== --- dev-tools/idea/modules/analysis/common/common.iml (revision 1097270) +++ dev-tools/idea/modules/analysis/common/common.iml (working copy) @@ -7,7 +7,6 @@ - Index: dev-tools/idea/lucene/contrib/queryparser/queryparser.iml =================================================================== --- dev-tools/idea/lucene/contrib/queryparser/queryparser.iml (revision 1097270) +++ dev-tools/idea/lucene/contrib/queryparser/queryparser.iml (working copy) @@ -6,7 +6,6 @@ - Index: dev-tools/idea/lucene/contrib/ant/ant.iml =================================================================== --- dev-tools/idea/lucene/contrib/ant/ant.iml (revision 1097270) +++ dev-tools/idea/lucene/contrib/ant/ant.iml (working copy) @@ -6,7 +6,6 @@ - Index: dev-tools/idea/lucene/lucene.iml =================================================================== --- dev-tools/idea/lucene/lucene.iml (revision 1097270) +++ dev-tools/idea/lucene/lucene.iml (working copy) @@ -6,7 +6,6 @@ -