Index: src/java/org/apache/lucene/index/IndexDeletionPolicy.java =================================================================== --- src/java/org/apache/lucene/index/IndexDeletionPolicy.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexDeletionPolicy.java (working copy) @@ -21,10 +21,13 @@ import java.io.IOException; /** - *

Expert: implement this interface, and pass it to one + *

Expert: policy for deletion of stale {@link IndexCommitPoint index commits}. + * + *

Implement this interface, and pass it to one * of the {@link IndexWriter} or {@link IndexReader} - * constructors, to customize when "point in time" commits - * are deleted from an index. The default deletion policy + * constructors, to customize when older + * {@link IndexCommitPoint point-in-time commits} + * are deleted from the index directory. The default deletion policy * is {@link KeepOnlyLastCommitDeletionPolicy}, which always * removes old commits as soon as a new commit is done (this * matches the behavior before 2.2).

@@ -52,31 +55,36 @@ * instantiated to give the policy a chance to remove old * commit points.

* - *

The writer locates all commits present in the index - * and calls this method. The policy may choose to delete - * commit points. To delete a commit point, call the - * {@link IndexCommitPoint#delete} method.

+ *

The writer locates all index commits present in the + * index directory and calls this method. The policy may + * choose to delete some of the commit points, doing so by + * calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.

* - * @param commits List of {@link IndexCommitPoint}, + * @param commits List of current + * {@link IndexCommitPoint point-in-time commits}, * sorted by age (the 0th one is the oldest commit). */ public void onInit(List commits) throws IOException; /** - *

This is called each time the writer commits. This - * gives the policy a chance to remove old commit points + *

This is called each time the writer completed a commit. + * This gives the policy a chance to remove old commit points * with each commit.

* + *

The policy may now choose to delete old commit points + * by calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.

+ * *

If writer has autoCommit = true then * this method will in general be called many times during * one instance of {@link IndexWriter}. If * autoCommit = false then this method is * only called once when {@link IndexWriter#close} is * called, or not at all if the {@link IndexWriter#abort} - * is called. The policy may now choose to delete old - * commit points by calling {@link IndexCommitPoint#delete}. + * is called. * - * @param commits List of {@link IndexCommitPoint}>, + * @param commits List of {@link IndexCommitPoint}, * sorted by age (the 0th one is the oldest commit). */ public void onCommit(List commits) throws IOException; Index: src/java/org/apache/lucene/index/IndexCommitPoint.java =================================================================== --- src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexCommitPoint.java (working copy) @@ -18,24 +18,37 @@ */ /** - * Represents a single commit into an index as seen by the - * {@link IndexDeletionPolicy}. + *

Expert: represents a single commit into an index as seen by the + * {@link IndexDeletionPolicy}. + *

+ * Changes to the content of an index are made visible only + * after the writer who made that change had written to the + * directory a new segments file (Segments_N). This point in + * time, when the action of writing of a new segments file to the + * directory is completed, is therefore an index-commit-point. + *

+ * Each index-commit-point has a unique segments file associated + * with it. The segments file associated with a later + * index-commit-point would have a larger N. */ public interface IndexCommitPoint { /** - * Get the segments file (ie, segments_N) of - * this commit point. + * Get the segments file (segments_N) associated + * with this commit point. */ public String getSegmentsFileName(); /** - * Notify the writer that this commit point should be - * deleted. This should only be called by the {@link - * IndexDeletionPolicy} during its {@link - * IndexDeletionPolicy#onInit} or {@link - * IndexDeletionPolicy#onCommit} method. + * Delete this commit point. + *

+ * Upon calling this, the writer is notified that this commit + * point should be deleted. + *

+ * Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect + * and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or + * {@link IndexDeletionPolicy#onCommit onCommit()} methods. */ public void delete(); } Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -44,8 +44,8 @@ /** Extension of deletes */ static final String DELETES_EXTENSION = "del"; - /** Extension of single norms */ - static final String SINGLE_NORMS_EXTENSION = "f"; + /** Extension of plain norms */ + static final String PLAIN_NORMS_EXTENSION = "f"; /** Extension of separate norms */ static final String SEPARATE_NORMS_EXTENSION = "s"; @@ -91,9 +91,9 @@ * @param gen -- generation */ static final String fileNameFromGeneration(String base, String extension, long gen) { - if (gen == -1) { + if (gen == SegmentInfo.NO) { return null; - } else if (gen == 0) { + } else if (gen == SegmentInfo.WITHOUT_GEN) { return base + extension; } else { return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension; Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -126,6 +126,30 @@ normally relies on.

*/ +/* + * Clarification: Check Points (and commits) + * Being able to set autoCommit=false allows IndexWriter to flush and + * write new index files to the directory without writing a new Segments + * file which references these new files. It also means that the state of + * the in memory SegmentInfos object is different than the most recent + * onDir Segments file. + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) Segments file - this check point is also an + * IndexCommitPoint. + * + * With autoCommit=true, every checkPoint is also a CommitPoint. + * With autoCommit=false, some checkPoints may not be commits. + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter + * to delete files that are referenced only by stale checkpoints. + * (files that were created since the last commit, but are no longer + * referenced by the "front" of the index). For this, IndexFileDeleter + * keeps track of the last non commit checkpoint. + */ public class IndexWriter { /** @@ -1655,7 +1679,9 @@ /** * Flush all in-memory buffered updates (adds and deletes) - * to the Directory. + * to the Directory. + *

Note: if autocommit=false, flushed data would still + * not be isible to readers. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 518544) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -33,20 +33,30 @@ /* * This class keeps track of each SegmentInfos instance that - * is still "live", either because it corresponds to a - * segments_N in the Directory (a real commit) or because - * it's the in-memory SegmentInfos that a writer is actively - * updating but has not yet committed (currently this only - * applies when autoCommit=false in IndexWriter). This - * class uses simple reference counting to map the live - * SegmentInfos instances to individual files in the - * Directory. + * is still "live", either because it corresponds to a + * segments_N file in the Directory (a "commit", i.e. a + * committed SegmentInfos) or because it's the in-memory SegmentInfos + * that a writer is actively updating but has not yet committed + * (currently this only applies when autoCommit=false in IndexWriter). + * This class uses simple reference counting to map the live + * SegmentInfos instances to individual files in the Directory. + * + * Clarification 1: the same directory file may be + * referenced by more than one IndexCommitPoints, i.e. more + * than one SegmentInfos. Therefore we count how many commits + * reference each file. When all the commits referencing + * a certain file have been deleted, the refcount for that + * file would zero, and the file would be deleted. It is * * A separate deletion policy interface * (IndexDeletionPolicy) is consulted on creation (onInit) * and once per commit (onCommit), to decide when a commit * should be removed. * + * Clarification 2: It is the business of the IndexDeletionPolicy + * to delete commit points. The actual file deletions derived + * from deleting commit points is the business of the IndexFileDeleter. + * * The current default deletion policy is {@link * KeepOnlyLastCommitDeletionPolicy}, which removes all * prior commits when a new commit has completed. This @@ -64,8 +74,9 @@ * so we will retry them again later: */ private List deletable; - /* Reference count for all files in the index. Maps - * String to RefCount (class below) instances: */ + /* Reference count for all files in the index. + * Counts how many existing commits reference a file. + * Maps String to RefCount (class below) instances: */ private Map refCounts = new HashMap(); /* Holds all commits (segments_N) currently in the index. @@ -79,8 +90,10 @@ * non-commit checkpoint: */ private List lastFiles = new ArrayList(); + /* Commits that the IndexDeletionPolicy have decided to delete: */ + private List commitsToDelete = new ArrayList(); + private PrintStream infoStream; - private List toDelete = new ArrayList(); private Directory directory; private IndexDeletionPolicy policy; @@ -188,19 +201,19 @@ } /** - * Remove the CommitPoints in the toDelete List by + * Remove the CommitPoints in the commitsToDelete List by * DecRef'ing all files from each SegmentInfos. */ private void deleteCommits() throws IOException { - int size = toDelete.size(); + int size = commitsToDelete.size(); if (size > 0) { // First decref all files that had been referred to by // the now-deleted commits: for(int i=0;i=YES says this field + // has separate norms. - private byte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's + private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's // pre-2.1 (ie, must check file system to see // if .cfs and .nrm exist) @@ -59,15 +69,15 @@ this.name = name; this.docCount = docCount; this.dir = dir; - delGen = -1; - isCompoundFile = 0; + delGen = NO; + isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; } public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { this(name, docCount, dir); - this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1); + this.isCompoundFile = (byte) (isCompoundFile ? YES : NO); this.hasSingleNormFile = hasSingleNormFile; preLockless = false; } @@ -112,7 +122,7 @@ hasSingleNormFile = false; } int numNormGen = input.readInt(); - if (numNormGen == -1) { + if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; @@ -121,11 +131,11 @@ } } isCompoundFile = input.readByte(); - preLockless = isCompoundFile == 0; + preLockless = (isCompoundFile == CHECK_DIR); } else { - delGen = 0; + delGen = CHECK_DIR; normGen = null; - isCompoundFile = 0; + isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; } @@ -138,11 +148,15 @@ // norms set against it yet: normGen = new long[numFields]; - if (!preLockless) { + if (preLockless) { + // do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know + // we have to check filesystem for norm files, because this is prelockless. + + } else { // This is a FORMAT_LOCKLESS segment, which means // there are no separate norms: for(int i=0;i 0: this means this segment was written by + // delGen >= YES: this means this segment was written by // the LOCKLESS code and for certain has // deletions // - if (delGen == -1) { + if (delGen == NO) { return false; - } else if (delGen > 0) { + } else if (delGen >= YES) { return true; } else { return dir.fileExists(getDelFileName()); @@ -175,8 +189,8 @@ void advanceDelGen() { // delGen 0 is reserved for pre-LOCKLESS format - if (delGen == -1) { - delGen = 1; + if (delGen == NO) { + delGen = YES; } else { delGen++; } @@ -184,7 +198,7 @@ } void clearDelGen() { - delGen = -1; + delGen = NO; files = null; } @@ -201,13 +215,13 @@ } String getDelFileName() { - if (delGen == -1) { + if (delGen == NO) { // In this case we know there is no deletion filename // against this segment return null; } else { - // If delGen is 0, it's the pre-lockless-commit file format - return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); + // If delGen is CHECK_DIR, it's the pre-lockless-commit file format + return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -218,11 +232,11 @@ */ boolean hasSeparateNorms(int fieldNumber) throws IOException { - if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) { + if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) { // Must fallback to directory file exists check: String fileName = name + ".s" + fieldNumber; return dir.fileExists(fileName); - } else if (normGen == null || normGen[fieldNumber] == -1) { + } else if (normGen == null || normGen[fieldNumber] == NO) { return false; } else { return true; @@ -258,17 +272,17 @@ } } else { // This means this segment was saved with LOCKLESS - // code so we first check whether any normGen's are > - // 0 (meaning they definitely have separate norms): + // code so we first check whether any normGen's are >= 1 + // (meaning they definitely have separate norms): for(int i=0;i 0) { + if (normGen[i] >= YES) { return true; } } // Next we look for any == 0. These cases were // pre-LOCKLESS and must be checked in directory: for(int i=0;i 0 || dir.fileExists(delFileName))) { + if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { files.add(delFileName); } - // Careful logic for norms files: + // Careful logic for norms files if (normGen != null) { for(int i=0;i 0) { + if (gen >= YES) { // Definitely a separate norm file, with generation: files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); - } else if (-1 == gen) { - // No separate norms but maybe non-separate norms + } else if (NO == gen) { + // No separate norms but maybe plain norms // in the non compound file case: if (!hasSingleNormFile && !useCompoundFile) { - String fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; + String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; if (dir.fileExists(fileName)) { files.add(fileName); } } - } else if (0 == gen) { + } else if (CHECK_DIR == gen) { // Pre-2.1: we have to check file existence String fileName = null; if (useCompoundFile) { fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; } else if (!hasSingleNormFile) { - fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; + fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; } if (fileName != null && dir.fileExists(fileName)) { files.add(fileName); @@ -445,7 +459,7 @@ if (useCompoundFile) prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; else - prefix = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION; + prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; int prefixLength = prefix.length(); String[] allFiles = dir.list(); if (allFiles == null) @@ -460,3 +474,4 @@ return files; } } +