Index: lucene/core/src/test/org/apache/lucene/index/TestExternalSegmentData.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestExternalSegmentData.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/index/TestExternalSegmentData.java (working copy) @@ -0,0 +1,80 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.externaldoclength.ExternalDocLengthReader; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestExternalSegmentData extends LuceneTestCase { + + public void test() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + TieredMergePolicy tmp = new TieredMergePolicy(); + // We could allow true if we fixed our "reader" to open + // CFS when necessary: + tmp.setUseCompoundFile(false); + iwc.setMergePolicy(tmp); + iwc.setCodec(Codec.forName("ExternalDocLengthCodec")); + + // nocommit add threads here: + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + int numDocs = atLeast(1000); + for(int i=0;i fieldsReaderLocal = new CloseableThreadLocal() { @Override @@ -120,6 +123,13 @@ termVectorsReaderOrig = null; } + ExternalDataFormat edf = si.info.getCodec().externalDataFormat(); + if (edf != null) { + edr = edf.getReader(segmentReadState); + } else { + edr = null; + } + success = true; } finally { if (!success) { @@ -142,12 +152,12 @@ //System.out.println("core.decRef seg=" + owner.getSegmentInfo() + " rc=" + ref); if (ref.decrementAndGet() == 0) { IOUtils.close(termVectorsLocal, fieldsReaderLocal, fields, perDocProducer, - termVectorsReaderOrig, fieldsReaderOrig, cfsReader, norms); + termVectorsReaderOrig, fieldsReaderOrig, cfsReader, norms, edr); notifyCoreClosedListeners(); } } - private final void notifyCoreClosedListeners() { + private void notifyCoreClosedListeners() { synchronized(coreClosedListeners) { for (CoreClosedListener listener : coreClosedListeners) { listener.onClose(owner); @@ -167,4 +177,8 @@ public String toString() { return "SegmentCoreReader(owner=" + owner + ")"; } + + public ExternalDataReader getExternalDataReader() { + return edr; + } } Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1429576) +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -24,6 +24,7 @@ import java.util.Map; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.ExternalDataFormat; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PerDocConsumer; @@ -156,6 +157,11 @@ FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); fieldInfosWriter.write(directory, mergeState.segmentInfo.name, mergeState.fieldInfos, context); + ExternalDataFormat edf = codec.externalDataFormat(); + if (edf != null) { + edf.getWriter(mergeState.segmentInfo.name).merge(segmentWriteState, mergeState); + } + return mergeState; } Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1429576) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -281,4 +281,9 @@ ensureOpen(); core.removeCoreClosedListener(listener); } + + @Override + public Object getExternalDataReader() { + return core.getExternalDataReader(); + } } Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1429576) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.ExternalDataFormat; +import org.apache.lucene.codecs.ExternalDataWriter; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; @@ -189,6 +191,8 @@ final Allocator byteBlockAllocator; final IntBlockPool.Allocator intBlockAllocator; + private ExternalDataFormat edf; + private ExternalDataWriter edw; public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent, FieldInfos.Builder fieldInfos, IndexingChain indexingChain) { @@ -201,6 +205,7 @@ this.codec = parent.codec; this.docState = new DocState(this, infoStream); this.docState.similarity = parent.indexWriter.getConfig().getSimilarity(); + edf = codec.externalDataFormat(); bytesUsed = Counter.newCounter(); byteBlockAllocator = new DirectTrackingAllocator(bytesUsed); pendingDeletes = new BufferedDeletes(); @@ -248,6 +253,10 @@ try { try { consumer.processDocument(fieldInfos); + // nocommit move into consumer? + if (edw != null) { + edw.addDocument(docState.docID, doc); + } } finally { docState.clear(); } @@ -275,7 +284,7 @@ finishDocument(delTerm); } - private void initSegmentInfo() { + private void initSegmentInfo() throws IOException { String segment = writer.newSegmentName(); segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, -1, false, codec, null, null); @@ -283,6 +292,11 @@ if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segment + " delQueue=" + deleteQueue); } + if (edf != null) { + edw = edf.getWriter(segmentInfo.name); + } else { + edw = null; + } } public int updateDocuments(Iterable docs, Analyzer analyzer, Term delTerm) throws IOException { @@ -307,6 +321,10 @@ boolean success = false; try { consumer.processDocument(fieldInfos); + // nocommit move into consumer? + if (edf != null) { + edw.addDocument(docState.docID, doc); + } success = true; } finally { if (!success) { @@ -486,6 +504,10 @@ try { consumer.flush(flushState); + if (edf != null) { + edw.flush(flushState); + } + pendingDeletes.terms.clear(); segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); Index: lucene/core/src/java/org/apache/lucene/index/AtomicReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (revision 1429576) +++ lucene/core/src/java/org/apache/lucene/index/AtomicReader.java (working copy) @@ -188,4 +188,9 @@ * synchronization. */ public abstract Bits getLiveDocs(); + + public Object getExternalDataReader() { + ensureOpen(); + return null; + } } Index: lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec =================================================================== --- lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (revision 1429576) +++ lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (working copy) @@ -18,3 +18,5 @@ org.apache.lucene.codecs.compressing.FastDecompressionCompressingCodec org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec org.apache.lucene.codecs.compressing.DummyCompressingCodec +org.apache.lucene.codecs.externaldoclength.ExternalDocLengthCodec + Index: lucene/test-framework/src/java/org/apache/lucene/codecs/externaldoclength/ExternalDocLengthReader.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/externaldoclength/ExternalDocLengthReader.java (revision 0) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/externaldoclength/ExternalDocLengthReader.java (working copy) @@ -0,0 +1,51 @@ +package org.apache.lucene.codecs.externaldoclength; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.ExternalDataReader; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.store.IndexInput; + +public class ExternalDocLengthReader extends ExternalDataReader { + private final int[] docFieldCounts; + + public ExternalDocLengthReader(SegmentReadState state) throws IOException { + final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, "", "fc"); + + docFieldCounts = new int[state.segmentInfo.getDocCount()]; + IndexInput in = state.dir.openInput(fileName, state.context); + try { + for(int docID=0;docID fieldCounts = new ArrayList(); + + @Override + public void addDocument(int docID, IndexDocument doc) { + // Skip any missing (deleted due to exc during + // indexing) docs: + while(fieldCounts.size() < docID) { + fieldCounts.add(0); + } + int count = 0; + for(StorableField f : doc.storableFields()) { + if (f.fieldType().stored()) { + // RIW randomly adds not-stored DocValues + // fields, so we have to not count those + count++; + } + } + if (LuceneTestCase.VERBOSE) { + System.out.println("FC: add docID=" + docID + " count=" + count); + } + fieldCounts.add(count); + } + + @Override + public void flush(SegmentWriteState state) throws IOException { + if (LuceneTestCase.VERBOSE) { + System.out.println("FC: flush seg=" + state.segmentInfo.name + " dir=" + state.directory); + } + while(fieldCounts.size() < state.segmentInfo.getDocCount()) { + fieldCounts.add(0); + } + String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, "", "fc"); + IndexOutput o = state.directory.createOutput(fileName, IOContext.DEFAULT); + for(int size : fieldCounts) { + if (LuceneTestCase.VERBOSE) { + System.out.println(" write " + size); + } + o.writeVInt(size); + } + o.close(); + } + + @Override + public void merge(SegmentWriteState state, MergeState mergeState) throws IOException { + String mergedFileName = IndexFileNames.segmentFileName(mergeState.segmentInfo.name, "", "fc"); + if (LuceneTestCase.VERBOSE) { + System.out.println("FC: merge seg=" + mergeState.segmentInfo.name); + } + IndexOutput o = state.directory.createOutput(mergedFileName, IOContext.DEFAULT); + + try { + for(AtomicReader r : mergeState.readers) { + // NOTE: will not work if you eg addIndices(SlowCompositeReader...) + String fileName = IndexFileNames.segmentFileName(((SegmentReader) r).getSegmentName(), "", "fc"); + IndexInput i = state.directory.openInput(fileName, IOContext.DEFAULT); + try { + Bits liveDocs = r.getLiveDocs(); + for(int docID=0;docID