Index: src/java/org/apache/lucene/index/IndexDeletionPolicy.java =================================================================== --- src/java/org/apache/lucene/index/IndexDeletionPolicy.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexDeletionPolicy.java (working copy) @@ -21,10 +21,13 @@ import java.io.IOException; /** - *
Expert: implement this interface, and pass it to one + *
Expert: policy for deletion of stale {@link IndexCommitPoint index commits}. + * + *
Implement this interface, and pass it to one * of the {@link IndexWriter} or {@link IndexReader} - * constructors, to customize when "point in time" commits - * are deleted from an index. The default deletion policy + * constructors, to customize when older + * {@link IndexCommitPoint point-in-time commits} + * are deleted from the index directory. The default deletion policy * is {@link KeepOnlyLastCommitDeletionPolicy}, which always * removes old commits as soon as a new commit is done (this * matches the behavior before 2.2).
@@ -52,31 +55,36 @@ * instantiated to give the policy a chance to remove old * commit points. * - *The writer locates all commits present in the index - * and calls this method. The policy may choose to delete - * commit points. To delete a commit point, call the - * {@link IndexCommitPoint#delete} method.
+ *The writer locates all index commits present in the + * index directory and calls this method. The policy may + * choose to delete some of the commit points, doing so by + * calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.
* - * @param commits List of {@link IndexCommitPoint}, + * @param commits List of current + * {@link IndexCommitPoint point-in-time commits}, * sorted by age (the 0th one is the oldest commit). */ public void onInit(List commits) throws IOException; /** - *This is called each time the writer commits. This - * gives the policy a chance to remove old commit points + *
This is called each time the writer completed a commit. + * This gives the policy a chance to remove old commit points * with each commit.
* + *The policy may now choose to delete old commit points + * by calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.
+ * *If writer has autoCommit = true then
* this method will in general be called many times during
* one instance of {@link IndexWriter}. If
* autoCommit = false then this method is
* only called once when {@link IndexWriter#close} is
* called, or not at all if the {@link IndexWriter#abort}
- * is called. The policy may now choose to delete old
- * commit points by calling {@link IndexCommitPoint#delete}.
+ * is called.
*
- * @param commits List of {@link IndexCommitPoint}>,
+ * @param commits List of {@link IndexCommitPoint},
* sorted by age (the 0th one is the oldest commit).
*/
public void onCommit(List commits) throws IOException;
Index: src/java/org/apache/lucene/index/IndexCommitPoint.java
===================================================================
--- src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 518544)
+++ src/java/org/apache/lucene/index/IndexCommitPoint.java (working copy)
@@ -18,24 +18,37 @@
*/
/**
- * Represents a single commit into an index as seen by the
- * {@link IndexDeletionPolicy}.
+ *
Expert: represents a single commit into an index as seen by the + * {@link IndexDeletionPolicy}. + *
+ * Changes to the content of an index are made visible only + * after the writer who made that change had written to the + * directory a new segments file (Segments_N). This point in + * time, when the action of writing of a new segments file to the + * directory is completed, is therefore an index-commit-point. + *
+ * Each index-commit-point has a unique segments file associated
+ * with it. The segments file associated with a later
+ * index-commit-point would have a larger N.
*/
public interface IndexCommitPoint {
/**
- * Get the segments file (ie, segments_N) of
- * this commit point.
+ * Get the segments file (segments_N) associated
+ * with this commit point.
*/
public String getSegmentsFileName();
/**
- * Notify the writer that this commit point should be
- * deleted. This should only be called by the {@link
- * IndexDeletionPolicy} during its {@link
- * IndexDeletionPolicy#onInit} or {@link
- * IndexDeletionPolicy#onCommit} method.
+ * Delete this commit point.
+ *
+ * Upon calling this, the writer is notified that this commit + * point should be deleted. + *
+ * Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect + * and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or + * {@link IndexDeletionPolicy#onCommit onCommit()} methods. */ public void delete(); } Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -44,8 +44,8 @@ /** Extension of deletes */ static final String DELETES_EXTENSION = "del"; - /** Extension of single norms */ - static final String SINGLE_NORMS_EXTENSION = "f"; + /** Extension of plain norms */ + static final String PLAIN_NORMS_EXTENSION = "f"; /** Extension of separate norms */ static final String SEPARATE_NORMS_EXTENSION = "s"; @@ -91,9 +91,9 @@ * @param gen -- generation */ static final String fileNameFromGeneration(String base, String extension, long gen) { - if (gen == -1) { + if (gen == SegmentInfo.NO) { return null; - } else if (gen == 0) { + } else if (gen == SegmentInfo.WITHOUT_GEN) { return base + extension; } else { return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension; Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -126,6 +126,30 @@ normally relies on.
*/ +/* + * Clarification: Check Points (and commits) + * Being able to set autoCommit=false allows IndexWriter to flush and + * write new index files to the directory without writing a new Segments + * file which references these new files. It also means that the state of + * the in memory SegmentInfos object is different than the most recent + * onDir Segments file. + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) Segments file - this check point is also an + * IndexCommitPoint. + * + * With autoCommit=true, every checkPoint is also a CommitPoint. + * With autoCommit=false, some checkPoints may not be commits. + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter + * to delete files that are referenced only by stale checkpoints. + * (files that were created since the last commit, but are no longer + * referenced by the "front" of the index). For this, IndexFileDeleter + * keeps track of the last non commit checkpoint. + */ public class IndexWriter { /** @@ -1655,7 +1679,9 @@ /** * Flush all in-memory buffered updates (adds and deletes) - * to the Directory. + * to the Directory. + *Note: if autocommit=false, flushed data would still
+ * not be isible to readers.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
Index: src/java/org/apache/lucene/index/IndexFileDeleter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 518544)
+++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy)
@@ -33,20 +33,30 @@
/*
* This class keeps track of each SegmentInfos instance that
- * is still "live", either because it corresponds to a
- * segments_N in the Directory (a real commit) or because
- * it's the in-memory SegmentInfos that a writer is actively
- * updating but has not yet committed (currently this only
- * applies when autoCommit=false in IndexWriter). This
- * class uses simple reference counting to map the live
- * SegmentInfos instances to individual files in the
- * Directory.
+ * is still "live", either because it corresponds to a
+ * segments_N file in the Directory (a "commit", i.e. a
+ * committed SegmentInfos) or because it's the in-memory SegmentInfos
+ * that a writer is actively updating but has not yet committed
+ * (currently this only applies when autoCommit=false in IndexWriter).
+ * This class uses simple reference counting to map the live
+ * SegmentInfos instances to individual files in the Directory.
+ *
+ * Clarification 1: the same directory file may be
+ * referenced by more than one IndexCommitPoints, i.e. more
+ * than one SegmentInfos. Therefore we count how many commits
+ * reference each file. When all the commits referencing
+ * a certain file have been deleted, the refcount for that
+ * file would zero, and the file would be deleted. It is
*
* A separate deletion policy interface
* (IndexDeletionPolicy) is consulted on creation (onInit)
* and once per commit (onCommit), to decide when a commit
* should be removed.
*
+ * Clarification 2: It is the business of the IndexDeletionPolicy
+ * to delete commit points. The actual file deletions derived
+ * from deleting commit points is the business of the IndexFileDeleter.
+ *
* The current default deletion policy is {@link
* KeepOnlyLastCommitDeletionPolicy}, which removes all
* prior commits when a new commit has completed. This
@@ -64,8 +74,9 @@
* so we will retry them again later: */
private List deletable;
- /* Reference count for all files in the index. Maps
- * String to RefCount (class below) instances: */
+ /* Reference count for all files in the index.
+ * Counts how many existing commits reference a file.
+ * Maps String to RefCount (class below) instances: */
private Map refCounts = new HashMap();
/* Holds all commits (segments_N) currently in the index.
@@ -79,8 +90,10 @@
* non-commit checkpoint: */
private List lastFiles = new ArrayList();
+ /* Commits that the IndexDeletionPolicy have decided to delete: */
+ private List commitsToDelete = new ArrayList();
+
private PrintStream infoStream;
- private List toDelete = new ArrayList();
private Directory directory;
private IndexDeletionPolicy policy;
@@ -188,19 +201,19 @@
}
/**
- * Remove the CommitPoints in the toDelete List by
+ * Remove the CommitPoints in the commitsToDelete List by
* DecRef'ing all files from each SegmentInfos.
*/
private void deleteCommits() throws IOException {
- int size = toDelete.size();
+ int size = commitsToDelete.size();
if (size > 0) {
// First decref all files that had been referred to by
// the now-deleted commits:
for(int i=0;i