Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt	(révision 1431123)
+++ lucene/CHANGES.txt	(copie de travail)
@@ -106,7 +106,11 @@
 
 * LUCENE-4659: Massive cleanup to CategoryPath API. Additionally, CategoryPath is 
   now immutable, so you don't need to clone() it. (Shai Erera)
-  
+
+* LUCENE-4670: StoredFieldsWriter and TermVectorsWriter have new finish* callbacks
+  which are called after a doc/field/term has been completely added.
+  (Adrien Grand, Robert Muir)
+
 New Features
 
 * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java	(copie de travail)
@@ -136,19 +136,8 @@
     }
   }
 
-  private void endWithPreviousDocument() throws IOException {
-    if (numBufferedDocs > 0) {
-      endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
-    }
-  }
-
   @Override
   public void startDocument(int numStoredFields) throws IOException {
-    endWithPreviousDocument();
-    if (triggerFlush()) {
-      flush();
-    }
-
     if (numBufferedDocs == this.numStoredFields.length) {
       final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
       this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
@@ -158,6 +147,14 @@
     ++numBufferedDocs;
   }
 
+  @Override
+  public void finishDocument() throws IOException {
+    endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
+    if (triggerFlush()) {
+      flush();
+    }
+  }
+
   private static void saveInts(int[] values, int length, DataOutput out) throws IOException {
     assert length > 0;
     if (length == 1) {
@@ -295,9 +292,10 @@
 
   @Override
   public void finish(FieldInfos fis, int numDocs) throws IOException {
-    endWithPreviousDocument();
     if (numBufferedDocs > 0) {
       flush();
+    } else {
+      assert bufferedDocs.length == 0;
     }
     if (docBase != numDocs) {
       throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
@@ -351,17 +349,13 @@
             }
 
             if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
-                && (numBufferedDocs == 0 || triggerFlush()) // starting a new chunk
+                && numBufferedDocs == 0 // starting a new chunk
                 && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
                 && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
                 && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
               assert docID == it.docBase;
 
               // no need to decompress, just copy data
-              endWithPreviousDocument();
-              if (triggerFlush()) {
-                flush();
-              }
               indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
               writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
               it.copyCompressedData(fieldsStream);
@@ -380,6 +374,7 @@
                 final int diff = docID - it.docBase;
                 startDocument(it.numStoredFields[diff]);
                 bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
+                finishDocument();
                 ++docCount;
                 mergeState.checkAbort.work(300);
               }
Index: lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java	(copie de travail)
@@ -71,18 +71,27 @@
    *  has no vector fields, in this case <code>numVectorFields</code> 
    *  will be zero. */
   public abstract void startDocument(int numVectorFields) throws IOException;
-  
+
+  /** Called after a doc and all its fields have been added. */
+  public void finishDocument() throws IOException {};
+
   /** Called before writing the terms of the field.
    *  {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
   public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
-  
+
+  /** Called after a field and all its terms have been added. */
+  public void finishField() throws IOException {};
+
   /** Adds a term and its term frequency <code>freq</code>.
    * If this field has positions and/or offsets enabled, then
    * {@link #addPosition(int, int, int, BytesRef)} will be called 
    * <code>freq</code> times respectively.
    */
   public abstract void startTerm(BytesRef term, int freq) throws IOException;
-  
+
+  /** Called after a term and all its positions have been added. */
+  public void finishTerm() throws IOException {}
+
   /** Adds a term position and offsets */
   public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
   
@@ -97,7 +106,7 @@
    *  check that this is the case to detect the JRE bug described 
    *  in LUCENE-1282. */
   public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
-  
+
   /** 
    * Called by IndexWriter when writing new segments.
    * <p>
@@ -197,6 +206,7 @@
   protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
     if (vectors == null) {
       startDocument(0);
+      finishDocument();
       return;
     }
 
@@ -275,10 +285,13 @@
             addPosition(pos, startOffset, endOffset, payload);
           }
         }
+        finishTerm();
       }
       assert termCount == numTerms;
+      finishField();
     }
     assert fieldCount == numFields;
+    finishDocument();
   }
   
   /** Return the BytesRef Comparator used to sort terms
Index: lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java	(copie de travail)
@@ -55,7 +55,10 @@
    *  called even if the document has no stored fields, in
    *  this case <code>numStoredFields</code> will be zero. */
   public abstract void startDocument(int numStoredFields) throws IOException;
-  
+
+  /** Called when a document and all its fields have been added. */
+  public void finishDocument() throws IOException {}
+
   /** Writes a single stored field. */
   public abstract void writeField(FieldInfo info, StorableField field) throws IOException;
 
@@ -116,6 +119,8 @@
     for (StorableField field : doc) {
       writeField(fieldInfos.fieldInfo(field.name()), field);
     }
+
+    finishDocument();
   }
 
   @Override
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java	(copie de travail)
@@ -124,17 +124,16 @@
     if (payloads)
       bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
     tvf.writeByte(bits);
-    
-    assert fieldCount <= numVectorFields;
-    if (fieldCount == numVectorFields) {
-      // last field of the document
-      // this is crazy because the file format is crazy!
-      for (int i = 1; i < fieldCount; i++) {
-        tvd.writeVLong(fps[i] - fps[i-1]);
-      }
+  }
+  
+  @Override
+  public void finishDocument() throws IOException {
+    assert fieldCount == numVectorFields;
+    for (int i = 1; i < fieldCount; i++) {
+      tvd.writeVLong(fps[i] - fps[i-1]);
     }
   }
-  
+
   private final BytesRef lastTerm = new BytesRef(10);
 
   // NOTE: we override addProx, so we don't need to buffer when indexing.
@@ -222,20 +221,6 @@
       }
       
       bufferedIndex++;
-      
-      // dump buffer if we are done
-      if (bufferedIndex == bufferedFreq) {
-        if (payloads) {
-          tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
-        }
-        for (int i = 0; i < bufferedIndex; i++) {
-          if (offsets) {
-            tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
-            tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
-            lastOffset = offsetEndBuffer[i];
-          }
-        }
-      }
     } else if (positions) {
       // write position delta
       writePosition(position - lastPosition, payload);
@@ -248,6 +233,25 @@
     }
   }
   
+  @Override
+  public void finishTerm() throws IOException {
+    if (bufferedIndex > 0) {
+      // dump buffer
+      assert positions && (offsets || payloads);
+      assert bufferedIndex == bufferedFreq;
+      if (payloads) {
+        tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
+      }
+      for (int i = 0; i < bufferedIndex; i++) {
+        if (offsets) {
+          tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
+          tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
+          lastOffset = offsetEndBuffer[i];
+        }
+      }
+    }
+  }
+
   private void writePosition(int delta, BytesRef payload) throws IOException {
     if (payloads) {
       int payloadLength = payload == null ? 0 : payload.length;
Index: lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java	(copie de travail)
@@ -105,6 +105,7 @@
     while(lastDocID < docID) {
       fieldsWriter.startDocument(0);
       lastDocID++;
+      fieldsWriter.finishDocument();
     }
   }
 
@@ -119,6 +120,7 @@
       for (int i = 0; i < numStoredFields; i++) {
         fieldsWriter.writeField(fieldInfos[i], storedFields[i]);
       }
+      fieldsWriter.finishDocument();
       lastDocID++;
     }
 
Index: lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java	(copie de travail)
@@ -78,6 +78,7 @@
   void fill(int docID) throws IOException {
     while(lastDocID < docID) {
       writer.startDocument(0);
+      writer.finishDocument();
       lastDocID++;
     }
   }
@@ -108,6 +109,7 @@
     for (int i = 0; i < numVectorFields; i++) {
       perFields[i].finishDocument();
     }
+    writer.finishDocument();
 
     assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
 
Index: lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java	(révision 1431123)
+++ lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java	(copie de travail)
@@ -182,7 +182,9 @@
         }
         tv.addProx(freq, posReader, offReader);
       }
+      tv.finishTerm();
     }
+    tv.finishField();
 
     termsHashPerField.reset();
 
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java	(révision 1431123)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java	(copie de travail)
@@ -18,17 +18,20 @@
  */
 
 import java.io.IOException;
+import java.util.Comparator;
 
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
 import org.apache.lucene.index.AssertingAtomicReader;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * Just like {@link Lucene40TermVectorsFormat} but with additional asserts.
@@ -43,7 +46,7 @@
 
   @Override
   public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
-    return in.vectorsWriter(directory, segmentInfo, context);
+    return new AssertingTermVectorsWriter(in.vectorsWriter(directory, segmentInfo, context));
   }
   
   static class AssertingTermVectorsReader extends TermVectorsReader {
@@ -68,5 +71,117 @@
     public TermVectorsReader clone() {
       return new AssertingTermVectorsReader(in.clone());
     }
-  }  
+  }
+
+  enum Status {
+    UNDEFINED, STARTED, FINISHED;
+  }
+
+  static class AssertingTermVectorsWriter extends TermVectorsWriter {
+    private final TermVectorsWriter in;
+    private Status docStatus, fieldStatus, termStatus;
+    private int fieldCount, termCount, positionCount;
+    boolean hasPositions;
+
+    AssertingTermVectorsWriter(TermVectorsWriter in) {
+      this.in = in;
+      docStatus = Status.UNDEFINED;
+      fieldStatus = Status.UNDEFINED;
+      termStatus = Status.UNDEFINED;
+      fieldCount = termCount = positionCount = 0;
+    }
+
+    @Override
+    public void startDocument(int numVectorFields) throws IOException {
+      assert fieldCount == 0;
+      assert docStatus != Status.STARTED;
+      in.startDocument(numVectorFields);
+      docStatus = Status.STARTED;
+      fieldCount = numVectorFields;
+    }
+
+    @Override
+    public void finishDocument() throws IOException {
+      assert fieldCount == 0;
+      assert docStatus == Status.STARTED;
+      in.finishDocument();
+      docStatus = Status.FINISHED;
+    }
+
+    @Override
+    public void startField(FieldInfo info, int numTerms, boolean positions,
+        boolean offsets, boolean payloads) throws IOException {
+      assert termCount == 0;
+      assert docStatus == Status.STARTED;
+      assert fieldStatus != Status.STARTED;
+      in.startField(info, numTerms, positions, offsets, payloads);
+      fieldStatus = Status.STARTED;
+      termCount = numTerms;
+      hasPositions = positions || offsets || payloads;
+    }
+
+    @Override
+    public void finishField() throws IOException {
+      assert termCount == 0;
+      assert fieldStatus == Status.STARTED;
+      in.finishField();
+      fieldStatus = Status.FINISHED;
+      --fieldCount;
+    }
+
+    @Override
+    public void startTerm(BytesRef term, int freq) throws IOException {
+      assert docStatus == Status.STARTED;
+      assert fieldStatus == Status.STARTED;
+      assert termStatus != Status.STARTED;
+      in.startTerm(term, freq);
+      termStatus = Status.STARTED;
+      positionCount = hasPositions ? freq : 0;
+    }
+
+    @Override
+    public void finishTerm() throws IOException {
+      assert positionCount == 0;
+      assert docStatus == Status.STARTED;
+      assert fieldStatus == Status.STARTED;
+      assert termStatus == Status.STARTED;
+      in.finishTerm();
+      termStatus = Status.FINISHED;
+      --termCount;
+    }
+
+    @Override
+    public void addPosition(int position, int startOffset, int endOffset,
+        BytesRef payload) throws IOException {
+      assert docStatus == Status.STARTED;
+      assert fieldStatus == Status.STARTED;
+      assert termStatus == Status.STARTED;
+      in.addPosition(position, startOffset, endOffset, payload);
+      --positionCount;
+    }
+
+    @Override
+    public void abort() {
+      in.abort();
+    }
+
+    @Override
+    public void finish(FieldInfos fis, int numDocs) throws IOException {
+      assert docStatus != Status.STARTED;
+      assert fieldStatus != Status.STARTED;
+      assert termStatus != Status.STARTED; 
+      in.finish(fis, numDocs);
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() throws IOException {
+      return in.getComparator();
+    }
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+  }
 }
Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java	(révision 1431123)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java	(copie de travail)
@@ -91,6 +91,12 @@
     }
 
     @Override
+    public void finishDocument() throws IOException {
+      assert fieldCount == 0;
+      in.finishDocument();
+    }
+
+    @Override
     public void writeField(FieldInfo info, StorableField field) throws IOException {
       in.writeField(info, field);
       assert fieldCount > 0;
