Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 619987)
+++ CHANGES.txt (working copy)
@@ -18,6 +18,14 @@
compatibility will be removed in 3.0 (hardwiring the value to
true). (Mike McCandless)
+ 2. LUCENE-1044: IndexWriter with autoCommit=true now commits (such
+ that a reader can see the changes) far less often than it used to.
+ Previously, every flush was also a commit. You can always force a
+ commit by calling IndexWriter.commit(). Furthermore, in 3.0,
+ autoCommit will be hardwired to false (IndexWriter constructors
+ that take an autoCommit argument have been deprecated) (Mike
+ McCandless)
+
API Changes
1. LUCENE-1084: Changed all IndexWriter constructors to take an
@@ -36,6 +44,11 @@
the Lucene code base will need to be adapted. See also the javadocs
of the Filter class. (Paul Elschot, Michael Busch)
+ 4. LUCENE-1044: Added IndexWriter.commit() which flushes any buffered
+ adds/deletes and then commits a new segments file so readers will
+ see the changes. Deprecate IndexWriter.flush() in favor of
+ IndexWriter.commit(). (Mike McCandless)
+
Bug fixes
1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimze a single
@@ -75,6 +88,12 @@
5. LUCENE-494: Added QueryAutoStopWordAnalyzer to allow for the automatic removal, from a query of frequently occurring terms.
This Analyzer is not intended for use during indexing. (Mark Harwood via Grant Ingersoll)
+ 6. LUCENE-1044: Change Lucene to properly "sync" files after
+ committing, to ensure on a machine or OS crash or power cut, even
+ with cached writes, the index remains consistent. Also added
+ explicit commit() method to IndexWriter to force a commit without
+ having to close. (Mike McCandless)
+
Optimizations
1. LUCENE-705: When building a compound file, use
Index: src/test/org/apache/lucene/store/MockRAMInputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMInputStream.java (revision 619987)
+++ src/test/org/apache/lucene/store/MockRAMInputStream.java (working copy)
@@ -45,11 +45,14 @@
if (!isClone) {
synchronized(dir.openFiles) {
Integer v = (Integer) dir.openFiles.get(name);
- if (v.intValue() == 1) {
- dir.openFiles.remove(name);
- } else {
- v = new Integer(v.intValue()-1);
- dir.openFiles.put(name, v);
+ // Could be null when MockRAMDirectory.crash() was called
+ if (v != null) {
+ if (v.intValue() == 1) {
+ dir.openFiles.remove(name);
+ } else {
+ v = new Integer(v.intValue()-1);
+ dir.openFiles.put(name, v);
+ }
}
}
}
Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 619987)
+++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (working copy)
@@ -63,6 +63,11 @@
long freeSpace = dir.maxSize - dir.sizeInBytes();
long realUsage = 0;
+ // If MockRAMDir crashed since we were opened, then
+ // don't write anything:
+ if (dir.crashed)
+ throw new IOException("MockRAMDirectory was crashed");
+
// Enforce disk full:
if (dir.maxSize != 0 && freeSpace <= len) {
// Compute the real disk free. This will greatly slow
Index: src/test/org/apache/lucene/store/MockRAMDirectory.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMDirectory.java (revision 619987)
+++ src/test/org/apache/lucene/store/MockRAMDirectory.java (working copy)
@@ -24,7 +24,10 @@
import java.util.Random;
import java.util.Map;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
import java.util.ArrayList;
+import java.util.Arrays;
/**
* This is a subclass of RAMDirectory that adds methods
@@ -40,6 +43,10 @@
double randomIOExceptionRate;
Random randomState;
boolean noDeleteOpenFile = true;
+ boolean preventDoubleWrite = true;
+ private Set unSyncedFiles;
+ private Set createdFiles;
+ volatile boolean crashed;
// NOTE: we cannot initialize the Map here due to the
// order in which our constructor actually does this
@@ -47,31 +54,80 @@
// like super is called, then our members are initialized:
Map openFiles;
+ private void init() {
+ if (openFiles == null)
+ openFiles = new HashMap();
+ if (createdFiles == null)
+ createdFiles = new HashSet();
+ if (unSyncedFiles == null)
+ unSyncedFiles = new HashSet();
+ }
+
public MockRAMDirectory() {
super();
- if (openFiles == null) {
- openFiles = new HashMap();
- }
+ init();
}
public MockRAMDirectory(String dir) throws IOException {
super(dir);
- if (openFiles == null) {
- openFiles = new HashMap();
- }
+ init();
}
public MockRAMDirectory(Directory dir) throws IOException {
super(dir);
- if (openFiles == null) {
- openFiles = new HashMap();
- }
+ init();
}
public MockRAMDirectory(File dir) throws IOException {
super(dir);
- if (openFiles == null) {
+ init();
+ }
+
+ /** If set to true, we throw an IOException if the same
+ * file is opened by createOutput, ever. */
+ public void setPreventDoubleWrite(boolean value) {
+ preventDoubleWrite = value;
+ }
+
+ public synchronized void sync(String name) throws IOException {
+ maybeThrowDeterministicException();
+ if (crashed)
+ throw new IOException("cannot sync after crash");
+ if (unSyncedFiles.contains(name))
+ unSyncedFiles.remove(name);
+ }
+
+ /** Simulates a crash of OS or machine by overwriting
+ * unsycned files. */
+ public void crash() throws IOException {
+ synchronized(this) {
+ crashed = true;
openFiles = new HashMap();
}
+ Iterator it = unSyncedFiles.iterator();
+ unSyncedFiles = new HashSet();
+ int count = 0;
+ while(it.hasNext()) {
+ String name = (String) it.next();
+ RAMFile file = (RAMFile) fileMap.get(name);
+ if (count % 3 == 0) {
+ deleteFile(name, true);
+ } else if (count % 3 == 1) {
+ // Zero out file entirely
+ final int numBuffers = file.numBuffers();
+ for(int i=0;i
The optional autoCommit argument to the
- constructors
- controls visibility of the changes to {@link IndexReader} instances reading the same index.
- When this is false, changes are not
- visible until {@link #close()} is called.
- Note that changes will still be flushed to the
- {@link org.apache.lucene.store.Directory} as new files,
- but are not committed (no new segments_N file
- is written referencing the new files) until {@link #close} is
- called. If something goes terribly wrong (for example the
- JVM crashes) before {@link #close()}, then
- the index will reflect none of the changes made (it will
- remain in its starting state).
- You can also call {@link #abort()}, which closes the writer without committing any
- changes, and removes any index
+
[Deprecated: Note that in 3.0, IndexWriter will
+ no longer accept autoCommit=true (it will be hardwired to
+ false). You can always call {@link IndexWriter#commit()} yourself
+ when needed]. The optional autoCommit argument to the constructors
+ controls visibility of the changes to {@link IndexReader}
+ instances reading the same index. When this is
+ false, changes are not visible until {@link
+ #close()} is called. Note that changes will still be
+ flushed to the {@link org.apache.lucene.store.Directory}
+ as new files, but are not committed (no new
+ segments_N file is written referencing the
+ new files, nor are the files sync'd to stable storage)
+ until {@link #commit} or {@link #close} is called. If something
+ goes terribly wrong (for example the JVM crashes), then
+ the index will reflect none of the changes made since the
+ last commit, or the starting state if commit was not called.
+ You can also call {@link #abort}, which closes the writer
+ without committing any changes, and removes any index
files that had been flushed but are now unreferenced.
This mode is useful for preventing readers from refreshing
at a bad time (for example after you've done all your
- deletes but before you've done your adds).
- It can also be used to implement simple single-writer
- transactional semantics ("all or none").
When autoCommit is true then
- every flush is also a commit ({@link IndexReader}
- instances will see each flush as changes to the index).
- This is the default, to match the behavior before 2.2.
- When running in this mode, be careful not to refresh your
+ the writer will periodically commit on its own. This is
+ the default, to match the behavior before 2.2. However,
+ in 3.0, autoCommit will be hardwired to false. There is
+ no guarantee when exactly an auto commit will occur (it
+ used to be after every flush, but it is now after every
+ completed merge, as of 2.4). If you want to force a
+ commit, call {@link #commit}, or, close the writer. Once
+ a commit has finished, ({@link IndexReader} instances will
+ see the changes to the index as of that commit. When
+ running in this mode, be careful not to refresh your
readers while optimize or segment merges are taking place
as this can tie up substantial disk space.
path.
* Text will be analyzed with a. If create
* is true, then a new, empty index will be created in
- * path, replacing the index already there, if any.
+ * path, replacing the index already there,
+ * if any. Note that autoCommit defaults to true, but
+ * starting in 3.0 it will be hardwired to false.
*
* @param path the path to the index directory
* @param a the analyzer to use
@@ -487,6 +526,8 @@
* Text will be analyzed with a. If create
* is true, then a new, empty index will be created in
* path, replacing the index already there, if any.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param path the path to the index directory
* @param a the analyzer to use
@@ -541,6 +582,8 @@
* Text will be analyzed with a. If create
* is true, then a new, empty index will be created in
* d, replacing the index already there, if any.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param d the index directory
* @param a the analyzer to use
@@ -595,6 +638,8 @@
* path, first creating it if it does not
* already exist. Text will be analyzed with
* a.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param path the path to the index directory
* @param a the analyzer to use
@@ -641,6 +686,8 @@
* path, first creating it if it does not
* already exist. Text will be analyzed with
* a.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param path the path to the index directory
* @param a the analyzer to use
@@ -687,6 +734,8 @@
* d, first creating it if it does not
* already exist. Text will be analyzed with
* a.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param d the index directory
* @param a the analyzer to use
@@ -746,6 +795,10 @@
* @throws IOException if the directory cannot be
* read/written to or if there is any other low-level
* IO error
+ * @deprecated This will be removed in 3.0, when
+ * autoCommit will be hardwired to false. Use {@link
+ * #IndexWriter(Directory,Analyzer,MaxFieldLength)}
+ * instead, and call {@link #commit} when needed.
*/
public IndexWriter(Directory d, boolean autoCommit, Analyzer a, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
@@ -798,6 +851,10 @@
* if it does not exist and create is
* false or if there is any other low-level
* IO error
+ * @deprecated This will be removed in 3.0, when
+ * autoCommit will be hardwired to false. Use {@link
+ * #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)}
+ * instead, and call {@link #commit} when needed.
*/
public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
@@ -837,8 +894,33 @@
* IndexDeletionPolicy}, for the index in d,
* first creating it if it does not already exist. Text
* will be analyzed with a.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param d the index directory
+ * @param a the analyzer to use
+ * @param deletionPolicy see above
+ * @param mfl whether or not to limit field lengths
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ init(d, a, false, deletionPolicy, true, mfl.getLimit());
+ }
+
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in d,
+ * first creating it if it does not already exist. Text
+ * will be analyzed with a.
+ *
+ * @param d the index directory
* @param autoCommit see above
* @param a the analyzer to use
* @param deletionPolicy see above
@@ -851,6 +933,10 @@
* @throws IOException if the directory cannot be
* read/written to or if there is any other low-level
* IO error
+ * @deprecated This will be removed in 3.0, when
+ * autoCommit will be hardwired to false. Use {@link
+ * #IndexWriter(Directory,Analyzer,IndexDeletionPolicy,MaxFieldLength)}
+ * instead, and call {@link #commit} when needed.
*/
public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
@@ -889,8 +975,39 @@
* create is true, then a new, empty index
* will be created in d, replacing the index
* already there, if any.
+ * Note that autoCommit defaults to true, but starting in 3.0
+ * it will be hardwired to false.
*
* @param d the index directory
+ * @param a the analyzer to use
+ * @param create true to create the index or overwrite
+ * the existing one; false to append to the existing
+ * index
+ * @param deletionPolicy see above
+ * @param mfl whether or not to limit field lengths
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and create is
+ * false or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ init(d, a, create, false, deletionPolicy, true, mfl.getLimit());
+ }
+
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in d.
+ * Text will be analyzed with a. If
+ * create is true, then a new, empty index
+ * will be created in d, replacing the index
+ * already there, if any.
+ *
+ * @param d the index directory
* @param autoCommit see above
* @param a the analyzer to use
* @param create true to create the index or overwrite
@@ -907,6 +1024,10 @@
* if it does not exist and create is
* false or if there is any other low-level
* IO error
+ * @deprecated This will be removed in 3.0, when
+ * autoCommit will be hardwired to false. Use {@link
+ * #IndexWriter(Directory,Analyzer,boolean,IndexDeletionPolicy,MaxFieldLength)}
+ * instead, and call {@link #commit} when needed.
*/
public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
throws CorruptIndexException, LockObtainFailedException, IOException {
@@ -984,15 +1105,22 @@
} catch (IOException e) {
// Likely this means it's a fresh directory
}
- segmentInfos.write(directory);
+ segmentInfos.commit(directory);
} else {
segmentInfos.read(directory);
+
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ for(int i=0;iautoCommit=false, flushed data would still
- * not be visible to readers, until {@link #close} is called.
+ * Note: while this will force buffered docs to be + * pushed into the index, it will not make these docs + * visible to a reader. Use {@link #commit} instead * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error + * @deprecated please call {@link #commit}) instead */ public final void flush() throws CorruptIndexException, IOException { flush(true, false); } /** + *
Commits all pending updates (added & deleted documents) + * to the index, and syncs all referenced index files, + * such that a reader will see the changes. Note that + * this does not wait for any running background merges to + * finish. This may be a costly operation, so you should + * test the cost in your application and do it only when + * really necessary.
+ * + *Note that this operation calls Directory.sync on + * the index files. That call should not return until the + * file contents & metadata are on stable storage. For + * FSDirectory, this calls the OS's fsync. But, beware: + * some hardware devices may in fact cache writes even + * during fsync, and return before the bits are actually + * on stable storage, to give the appearance of faster + * performance. If you have such a device, and it does + * not have a battery backup (for example) then on power + * loss it may still lose data. Lucene cannot guarantee + * consistency on such devices.
+ */ + public final void commit() throws CorruptIndexException, IOException { + commit(true); + } + + private final void commit(boolean triggerMerges) throws CorruptIndexException, IOException { + flush(triggerMerges, true); + sync(true, 0); + } + + /** * Flush all in-memory buffered udpates (adds and deletes) * to the Directory. * @param triggerMerge if true, we may merge segments (if @@ -2681,10 +2852,15 @@ maybeMerge(); } + // TODO: this method should not have to be entirely + // synchronized, ie, merges should be allowed to commit + // even while a flush is happening private synchronized final boolean doFlush(boolean flushDocStores) throws CorruptIndexException, IOException { // Make sure no threads are actively adding a document + flushCount++; + // Returns true if docWriter is currently aborting, in // which case we skip flushing this segment if (docWriter.pauseAllThreads()) { @@ -2717,10 +2893,18 @@ // apply to more than just the last flushed segment boolean flushDeletes = docWriter.hasDeletes(); + int docStoreOffset = docWriter.getDocStoreOffset(); + + // docStoreOffset should only be non-zero when + // autoCommit == false + assert !autoCommit || 0 == docStoreOffset; + + boolean docStoreIsCompoundFile = false; + if (infoStream != null) { message(" flush: segment=" + docWriter.getSegment() + " docStoreSegment=" + docWriter.getDocStoreSegment() + - " docStoreOffset=" + docWriter.getDocStoreOffset() + + " docStoreOffset=" + docStoreOffset + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + @@ -2729,14 +2913,6 @@ message(" index before flush " + segString()); } - int docStoreOffset = docWriter.getDocStoreOffset(); - - // docStoreOffset should only be non-zero when - // autoCommit == false - assert !autoCommit || 0 == docStoreOffset; - - boolean docStoreIsCompoundFile = false; - // Check if the doc stores must be separately flushed // because other segments, besides the one we are about // to flush, reference it @@ -2754,60 +2930,63 @@ // If we are flushing docs, segment must not be null: assert segment != null || !flushDocs; - if (flushDocs || flushDeletes) { + if (flushDocs) { - SegmentInfos rollback = null; - - if (flushDeletes) - rollback = (SegmentInfos) segmentInfos.clone(); - boolean success = false; + final int flushedDocCount; try { - if (flushDocs) { + flushedDocCount = docWriter.flush(flushDocStores); + success = true; + } finally { + if (!success) { + if (infoStream != null) + message("hit exception flushing segment " + segment); + docWriter.abort(null); + deleter.refresh(segment); + } + } + + if (0 == docStoreOffset && flushDocStores) { + // This means we are flushing private doc stores + // with this segment, so it will not be shared + // with other segments + assert docStoreSegment != null; + assert docStoreSegment.equals(segment); + docStoreOffset = -1; + docStoreIsCompoundFile = false; + docStoreSegment = null; + } - if (0 == docStoreOffset && flushDocStores) { - // This means we are flushing private doc stores - // with this segment, so it will not be shared - // with other segments - assert docStoreSegment != null; - assert docStoreSegment.equals(segment); - docStoreOffset = -1; - docStoreIsCompoundFile = false; - docStoreSegment = null; - } + // Create new SegmentInfo, but do not add to our + // segmentInfos until deletes are flushed + // successfully. + newSegment = new SegmentInfo(segment, + flushedDocCount, + directory, false, true, + docStoreOffset, docStoreSegment, + docStoreIsCompoundFile); + } - int flushedDocCount = docWriter.flush(flushDocStores); - - newSegment = new SegmentInfo(segment, - flushedDocCount, - directory, false, true, - docStoreOffset, docStoreSegment, - docStoreIsCompoundFile); - segmentInfos.addElement(newSegment); - } + if (flushDeletes) { + try { + SegmentInfos rollback = (SegmentInfos) segmentInfos.clone(); - if (flushDeletes) { + boolean success = false; + try { // we should be able to change this so we can // buffer deletes longer and then flush them to - // multiple flushed segments, when - // autoCommit=false - applyDeletes(flushDocs); - doAfterFlush(); - } - - checkpoint(); - success = true; - } finally { - if (!success) { - - if (infoStream != null) - message("hit exception flushing segment " + segment); + // multiple flushed segments only when a commit() + // finally happens + applyDeletes(newSegment); + success = true; + } finally { + if (!success) { + if (infoStream != null) + message("hit exception flushing deletes"); - if (flushDeletes) { - - // Carefully check if any partial .del files - // should be removed: + // Carefully remove any partially written .del + // files final int size = rollback.size(); for(int i=0;i- 2.3 and above: + 2.3: Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField, NormGenNumField, IsCompoundFile>SegCount
++ 2.4 and above: + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField, + NormGenNumField, + IsCompoundFile>SegCount, Checksum +
Format, NameCounter, SegCount, SegSize, NumField, DocStoreOffset --> Int32
- Version, DelGen, NormGen --> Int64 + Version, DelGen, NormGen, Checksum --> Int64
@@ -842,7 +848,7 @@
- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, and -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 + Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 and -5 (SegmentInfos.FORMAT_CHECKSUM) as of Lucene 2.4.
@@ -925,6 +931,13 @@ shares a single set of these files with other segments.
+ ++ Checksum contains the CRC32 checksum of all bytes + in the segments_N file up until the checksum. + This is used to verify integrity of the file on + opening the index. +
Index: docs/fileformats.pdf =================================================================== --- docs/fileformats.pdf (revision 619987) +++ docs/fileformats.pdf (working copy) @@ -5,10 +5,10 @@ /Producer (FOP 0.20.5) >> endobj 5 0 obj -<< /Length 1115 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Length 1113 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gb!$G9lo#B&;KZO$6@53W]k9ICdOP`P=a5[dnAEt!C8gORi4Z:^TSn%I4u(M/f6Qu5V)`b?+hcW?/#04U4=qR5W\?WoeGhWYioMGj;W_>r>%*jBf#hS$N07??;IG:iWe2$GTd%P5A[5AGK.,clStMnIs*foQHm-?;6D7rjp(_fkuW9P8UVE3V0PI;7%6iam]H;hfIlOSITofT^+bJa!4,V)0b+f8okNaP[D!`crot;@qgDZ/Q,oMcirC1<3fAq1kT06JC`"7_RH?q]RN6mO`.&T.a?=N_M^`] egiiN5_lg%pQ$ki+5=e.cK'FATW!IFLM!RK^9@YkB>7c'TBcK:mT.RP(Mf-@5$SZFeH/.qOP8[CoK)io(%@c:lSb3O2BbV*kQ.5V;hA/k^3n[[Y*ugbok\A!W9pbiUa<#SI*r%'kEe"A5b)h$J1fY:qTN/A8ZWP@;.2sQH*BO9?RQ4o?dl(_&eE7@qRJ<6Ya+eY(TVVaQ%FO.`'.!Ri+K^`:31ld!Nd5iq]W5QOq=f.eq(V)Uk6r8-.gY$-^8Z9Zfcn;as,G^"!SdqT[dZ"JY_lae)gr$22\D8&P@BB*(3ER:t[/cOcf5l\f^s%$d+EPY7T<=jb #62Oo3,<#ES>HV-g#@Z?r.
-2.3 and above:
+2.3:
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGenNumField,
IsCompoundFile>SegCount
+
+2.4 and above:
+ Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
+ NormGenNumField,
+ IsCompoundFile>SegCount, Checksum
+
Format, NameCounter, SegCount, SegSize, NumField, DocStoreOffset --> Int32
- Version, DelGen, NormGen --> Int64
+ Version, DelGen, NormGen, Checksum --> Int64
SegName, DocStoreSegment --> String
@@ -1335,7 +1342,7 @@
IsCompoundFile, HasSingleNormFile, DocStoreIsCompoundFile --> Int8
- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, and -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3
+ Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 and -5 (SegmentInfos.FORMAT_CHECKSUM) as of Lucene 2.4.
Version counts how often the index has been
@@ -1408,7 +1415,13 @@
shares a single set of these files with other
segments.
+ Checksum contains the CRC32 checksum of all bytes
+ in the segments_N file up until the checksum.
+ This is used to verify integrity of the file on
+ opening the index.
+
The write lock, which is stored in the index
@@ -1426,7 +1439,7 @@
Note that prior to version 2.1, Lucene also used a
commit lock. This was removed in 2.1.
Prior to Lucene 2.1 there was a file "deletable"
@@ -1435,7 +1448,7 @@
the files that are deletable, instead, so no file
is written.
Starting with Lucene 1.4 the compound file format became default. This
is simply a container for all files described in the next section
@@ -1462,14 +1475,14 @@
-
+
The remaining files are all per-segment, and are
thus defined by suffix.
The term dictionary is represented as two files:
@@ -1874,7 +1887,7 @@
-
+
The .frq file contains the lists of documents
@@ -1992,7 +2005,7 @@
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
to entry 31 on level 0.
The .prx file contains the lists of positions that
@@ -2058,7 +2071,7 @@
Payload. If PayloadLength is not stored, then this Payload has the same
length as the Payload at the previous position.
@@ -2162,7 +2175,7 @@
2.1 and above:
Separate norm files are created (when adequate) for both compound and non compound segments.
Term Vector support is an optional on a field by
@@ -2295,7 +2308,7 @@
-
+
The .del file is
optional, and only exists when a segment contains deletions.
@@ -2367,7 +2380,7 @@
There
qTKk?uSmmI``Kop/SeGdSrf+;`nlhX)tN[$-#.]sK_7XTZ(*g2%0poSD[jf8bGfl8uAq`W/;1$S+qV@bfKb0CBh\dBX`8E!MUBVdN!2*iEU;!OsYo^#O\R6gt.*<)aaI[ot"O'B0+0g_AX8=eje*Tn>Z(1;7Lg!!3L4270;)hF0:Qr[?`hR*9m8hm;kUX%(0ZFRt)c:r!i!3T+4f'Sf6!9%KXXOTm+Xp^p4(Kn@oX_'N9i"tNM0W45P=$T(et#tGjR;AG0s-NA+Fj:k
Sonq6=u?"/GSUE<~>
endstream
endobj
86 0 obj
@@ -499,10 +499,10 @@
>>
endobj
87 0 obj
-<< /Length 1440 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1291 /Filter [ /ASCII85Decode /FlateDecode ]
>>
stream
-Gatm;D/\/e&H;*)Tl3SSKFoY201-=ZDW\f'2Pc8_%pLqW[V8sE/@cIc35^2m:Qf9126%P"+E6[/na;uqj_6grrAYaE),Y;]T+@^8-b^CO+oLMtrd?3s8sflH-,'-
Y)Tf]*Bb!joZRnZi*,,^9\RY3-F=]VMj&l`]Y2d/gV2Tui7X`UAFm(e)FFdn`0&cOXcckMHb=:R8c\%h^,-5il&W6d
Lock File
Deletable File
Compound Files
Per-Segment Files
Fields
Term Dictionary
Frequencies
Positions
Normalization Factors
Term Vectors
Deleted Documents
Limitations