Index: common-build.xml
===================================================================
--- common-build.xml (revision 777664)
+++ common-build.xml (working copy)
@@ -395,6 +395,7 @@
FilterIndexReader contains another IndexReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -213,7 +214,7 @@
/** @deprecated */
protected void doCommit() throws IOException { doCommit(null); }
- protected void doCommit(String commitUserData) throws IOException { in.commit(commitUserData); }
+ protected void doCommit(Map commitUserData) throws IOException { in.commit(commitUserData); }
protected void doClose() throws IOException { in.close(); }
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (revision 777664)
+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -27,6 +27,7 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Map;
/** IndexReader is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
@@ -525,9 +526,9 @@
/**
* Reads commitUserData, previously passed to {@link
- * IndexWriter#commit(String)}, from current index
+ * IndexWriter#commit(Map)}, from current index
* segments file. This will return null if {@link
- * IndexWriter#commit(String)} has never been called for
+ * IndexWriter#commit(Map)} has never been called for
* this index.
*
* @param directory where the index resides.
@@ -537,7 +538,7 @@
*
* @see #getCommitUserData()
*/
- public static String getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
+ public static Map getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
return SegmentInfos.readCurrentUserData(directory);
}
@@ -552,12 +553,12 @@
/**
* Retrieve the String userData optionally passed to
* IndexWriter#commit. This will return null if {@link
- * IndexWriter#commit(String)} has never been called for
+ * IndexWriter#commit(Map)} has never been called for
* this index.
*
* @see #getCommitUserData(Directory)
*/
- public String getCommitUserData() {
+ public Map getCommitUserData() {
throw new UnsupportedOperationException("This reader does not support this method.");
}
@@ -1017,12 +1018,13 @@
}
/**
- * @param commitUserData Opaque String that's recorded
- * into the segments file in the index, and retrievable
- * by {@link IndexReader#getCommitUserData}.
+ * @param commitUserData Opaque Map (String -> String)
+ * that's recorded into the segments file in the index,
+ * and retrievable by {@link
+ * IndexReader#getCommitUserData}.
* @throws IOException
*/
- public final synchronized void flush(String commitUserData) throws IOException {
+ public final synchronized void flush(Map commitUserData) throws IOException {
ensureOpen();
commit(commitUserData);
}
@@ -1049,7 +1051,7 @@
* (transactional semantics).
* @throws IOException if there is a low-level IO error
*/
- protected final synchronized void commit(String commitUserData) throws IOException {
+ protected final synchronized void commit(Map commitUserData) throws IOException {
if (hasChanges) {
doCommit(commitUserData);
}
@@ -1057,13 +1059,13 @@
}
/** Implements commit.
- * @deprecated Please implement {@link #doCommit(String)
+ * @deprecated Please implement {@link #doCommit(Map)
* instead}. */
protected abstract void doCommit() throws IOException;
/** Implements commit. NOTE: subclasses should override
* this. In 3.0 this will become an abstract method. */
- void doCommit(String commitUserData) throws IOException {
+ void doCommit(Map commitUserData) throws IOException {
// Default impl discards commitUserData; all Lucene
// subclasses override this (do not discard it).
doCommit();
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 777664)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -3718,6 +3718,7 @@
segmentInfos.clear(); // pop old infos & add new
info = new SegmentInfo(mergedName, docCount, directory, false, true,
-1, null, false, merger.hasProx());
+ setDiagnostics(info, "addIndexes(IndexReader[])");
segmentInfos.add(info);
}
@@ -3831,16 +3832,16 @@
* you should immediately close the writer. See above for details.
Expert: prepare for commit, specifying - * commitUserData String. This does the first phase of - * 2-phase commit. You can only call this when - * autoCommit is false. This method does all steps + * commitUserData Map (String -> String). This does the + * first phase of 2-phase commit. You can only call this + * when autoCommit is false. This method does all steps * necessary to commit changes since this writer was * opened: flushes pending added and deleted docs, syncs * the index files, writes most of next segments_N file. @@ -3849,7 +3850,7 @@ * #rollback()} to revert the commit and undo all changes * done since the writer was opened.
* - * You can also just call {@link #commit(String)} directly + * You can also just call {@link #commit(Map)} directly * without prepareCommit first in which case that method * will internally call prepareCommit. * @@ -3857,11 +3858,12 @@ * you should immediately close the writer. See above for details. * - * @param commitUserData Opaque String that's recorded - * into the segments file in the index, and retrievable - * by {@link IndexReader#getCommitUserData}. Note that - * when IndexWriter commits itself, for example if open - * with autoCommit=true, or, during {@link #close}, the + * @param commitUserData Opaque Map (String->String) + * that's recorded into the segments file in the index, + * and retrievable by {@link + * IndexReader#getCommitUserData}. Note that when + * IndexWriter commits itself, for example if open with + * autoCommit=true, or, during {@link #close}, the * commitUserData is unchanged (just carried over from * the prior commit). If this is null then the previous * commitUserData is kept. Also, the commitUserData will @@ -3869,11 +3871,11 @@ * index to commit. Therefore it's best to use this * feature only when autoCommit is false. */ - public final void prepareCommit(String commitUserData) throws CorruptIndexException, IOException { + public final void prepareCommit(Map commitUserData) throws CorruptIndexException, IOException { prepareCommit(commitUserData, false); } - private final void prepareCommit(String commitUserData, boolean internal) throws CorruptIndexException, IOException { + private final void prepareCommit(Map commitUserData, boolean internal) throws CorruptIndexException, IOException { if (hitOOM) { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); @@ -3926,22 +3928,22 @@ * href="#OOME">above for details. * * @see #prepareCommit - * @see #commit(String) + * @see #commit(Map) */ public final void commit() throws CorruptIndexException, IOException { commit(null); } /** Commits all changes to the index, specifying a - * commitUserData String. This just calls {@link - * #prepareCommit(String)} (if you didn't already call - * it) and then {@link #finishCommit}. + * commitUserData Map (String -> String). This just + * calls {@link #prepareCommit(Map)} (if you didn't + * already call it) and then {@link #finishCommit}. * *NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See above for details.
*/ - public final void commit(String commitUserData) throws CorruptIndexException, IOException { + public final void commit(Map commitUserData) throws CorruptIndexException, IOException { ensureOpen(); @@ -4136,6 +4138,7 @@ docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.hasProx()); + setDiagnostics(newSegment, "flush"); } docWriter.pushDeletes(); @@ -4646,6 +4649,13 @@ docStoreIsCompoundFile, false); + + Map details = new HashMap(); + details.put("optimize", merge.optimize+""); + details.put("mergeFactor", end+""); + details.put("mergeDocStores", mergeDocStores+""); + setDiagnostics(merge.info, "merge", details); + // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the @@ -4653,6 +4663,25 @@ mergingSegments.add(merge.info); } + private void setDiagnostics(SegmentInfo info, String source) { + setDiagnostics(info, source, null); + } + + private void setDiagnostics(SegmentInfo info, String source, Map details) { + Map diagnostics = new HashMap(); + diagnostics.put("source", source); + diagnostics.put("lucene.version", Constants.LUCENE_VERSION); + diagnostics.put("os", Constants.OS_NAME+""); + diagnostics.put("os.arch", Constants.OS_ARCH+""); + diagnostics.put("os.version", Constants.OS_VERSION+""); + diagnostics.put("java.version", Constants.JAVA_VERSION+""); + diagnostics.put("java.vendor", Constants.JAVA_VENDOR+""); + if (details != null) { + diagnostics.putAll(details); + } + info.setDiagnostics(diagnostics); + } + /** This is called after merging a segment and before * building its CFS. Return true if the files should be * sync'd. If you return false, then the source segment @@ -5142,7 +5171,7 @@ * if it wasn't already. If that succeeds, then we * prepare a new segments_N file but do not fully commit * it. */ - private void startCommit(long sizeInBytes, String commitUserData) throws IOException { + private void startCommit(long sizeInBytes, Map commitUserData) throws IOException { assert testPoint("startStartCommit"); Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 777664) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -585,7 +585,7 @@ long version; long generation; final boolean isOptimized; - final String userData; + final Map userData; public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException { this.directory = directory; @@ -625,7 +625,7 @@ return generation; } - public String getUserData() { + public Map getUserData() { return userData; } Index: src/java/org/apache/lucene/store/IndexInput.java =================================================================== --- src/java/org/apache/lucene/store/IndexInput.java (revision 777664) +++ src/java/org/apache/lucene/store/IndexInput.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.IOException; +import java.util.Map; +import java.util.HashMap; /** Abstract base class for input from a file in a {@link Directory}. A * random-access input stream. Used for all Lucene index input operations. @@ -226,4 +228,16 @@ return clone; } + // returns MapFormat, NameCounter, SegCount, SegSize, NumField, - DocStoreOffset, DeletionCount --> Int32 + DocStoreOffset, DeletionCount, DiagnosticsCount --> Int32
@@ -847,7 +847,7 @@
- SegName, DocStoreSegment, CommitUserData --> String + SegName, DocStoreSegment, CommitUserData, DiagnosticKey, DiagnosticValue --> String
@@ -856,7 +856,7 @@
- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9. + Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -9 (SegmentInfos.FORMAT_DIAGNOSTICS) as of Lucene 2.9.
@@ -963,6 +963,16 @@ a string previously passed to IndexWriter's commit or prepareCommit method.
++ If DiagnosticsCount is non-zero, then that number + of String -> String (key/value map) diagnostics is + stored. Currently these diagnostics are privately + written by IndexWriter, as a debugging aid, for + each segment it creates. It includes things like + the current Lucene version, OS, Java version, why + the segment was created (merge, flush, + addIndexes), etc. +
Index: docs/fileformats.pdf =================================================================== --- docs/fileformats.pdf (revision 777664) +++ docs/fileformats.pdf (working copy) @@ -8,7 +8,7 @@ << /Length 1117 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -Gb!$G9lo#B&;KZO$6@53W]k9ICdOP`P=a5[dnAEt!C8gORi4Z:^TSn%I4u(M/f6Qu5V)`b?+hcW?/#04U4=qR5W\?WoeGhWYioMGj;W_>r>%*jBf#hS$N07??;IG:iWe2$GTd%P5A[5AGK.,clStMnIs*foQHm-?;6D7rjp(_fkuW9P8UVE3V0PI;7%6iam]H;hfIlOSITofT^+bJa!4,V)0b+f8okNaP[D!`crot;@qgDZ/Q,oMcirCCc`IcFI[>!*[q,(C<5FO/G&hoJ'M>&lTf1DK/WXVZg0k@PWoq:JBaalA %#1h63YES#=kMbIF>epV:e0Ro2P.a9*(l;WdG=NW*H[n[kfTdHTX8k=5[)s9$m<
Format, NameCounter, SegCount, SegSize, NumField,
- DocStoreOffset, DeletionCount --> Int32
+ DocStoreOffset, DeletionCount, DiagnosticsCount --> Int32
Version, DelGen, NormGen, Checksum --> Int64
- SegName, DocStoreSegment, CommitUserData --> String
+ SegName, DocStoreSegment, CommitUserData, DiagnosticKey, DiagnosticValue --> String
IsCompoundFile, HasSingleNormFile,
DocStoreIsCompoundFile, HasProx, HasUserData --> Int8
- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9.
+ Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -9 (SegmentInfos.FORMAT_DIAGNOSTICS) as of Lucene 2.9.
Version counts how often the index has been
@@ -1453,7 +1453,17 @@
a string previously passed to IndexWriter's commit
or prepareCommit method.
+ If DiagnosticsCount is non-zero, then that number
+ of String -> String (key/value map) diagnostics is
+ stored. Currently these diagnostics are privately
+ written by IndexWriter, as a debugging aid, for
+ each segment it creates. It includes things like
+ the current Lucene version, OS, Java version, why
+ the segment was created (merge, flush,
+ addIndexes), etc.
+
The write lock, which is stored in the index
@@ -1471,7 +1481,7 @@
Note that prior to version 2.1, Lucene also used a
commit lock. This was removed in 2.1.
Prior to Lucene 2.1 there was a file "deletable"
@@ -1480,7 +1490,7 @@
the files that are deletable, instead, so no file
is written.
Starting with Lucene 1.4 the compound file format became default. This
is simply a container for all files described in the next section
@@ -1507,14 +1517,14 @@
-
+
The remaining files are all per-segment, and are
thus defined by suffix.
The term dictionary is represented as two files:
@@ -1926,7 +1936,7 @@
-
+
The .frq file contains the lists of documents
@@ -2054,7 +2064,7 @@
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
to entry 31 on level 0.
The .prx file contains the lists of positions that
@@ -2124,7 +2134,7 @@
Payload. If PayloadLength is not stored, then this Payload has the same
length as the Payload at the previous position.
@@ -2228,7 +2238,7 @@
2.1 and above:
Separate norm files are created (when adequate) for both compound and non compound segments.
Term Vector support is an optional on a field by
@@ -2361,7 +2371,7 @@
-
+
The .del file is
optional, and only exists when a segment contains deletions.
@@ -2433,7 +2443,7 @@
=X/5U/&O;(.+>L<07VEfq0pd][>*R5o6_Eke>WEsk%rrQ543]/s?(6+/Qg\9c?(HbjIfW@0_<042f6\h/srm`BKD.askKI`'_Uel;Gs/]eO`#C"/Zj*q#JT(;FIg&nsF
endstream
endobj
100 0 obj
@@ -604,10 +604,10 @@
>>
endobj
101 0 obj
-<< /Length 1528 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1395 /Filter [ /ASCII85Decode /FlateDecode ]
>>
stream
-Gau0D95iiK&AJ$C#eB:+L_Z%u(Q0h&mCL@_:!uH!V'uc)U20k9kS5C*rqE612MVq0-ANetCe,"J^3L67)bn%:PFiM!XSBQf-_Y*TL?Ej[\)TAr:L+4ss%6`O-@\d.5>6MhEt-W87Hqa*:KdT#)9MU.5t_D,Kkt1i$dVYLaFai@_X69).69D`-9JaBTX&Ld(;h_\VV8HX,W5V)C;pO;q[UO0E$3O:6Bijd`/c^"ss\'+LWD:8)ugP:@BW;3KpkDja1qUR+o?2o4SSUQ:#.RM^m!ZFp//E5S>a%"5iEM@/,K4#Mu=3?_P`W1LWC7Nf[J-te,=fF:eG!nBg-2C]-@[RfTKco;A[nZ5-H%#a_gHL6IED+cF5hI1b286A4OjaEi^U>25^!maRBKhoLP&]&[GU84OFuYeVlc1hnU]OmAar+9QWCiQkU_=l5AjWR'='L--%Q7QKZKGL>GU@Ds8os_$r5Z;1TDA6EQb19C%(2@0n\/=m!r*e*:.)'KlWGF@NgBKSYAii)jNt].)IBn.8mS85'SsOd=^Td$)M3dXWB@%s0Dc)74*^+mm+>Wo"Y`%15ssnul>SnO,gG368[5;H5it_1o'IARIor<$q2]j>FrfPi:83K^93#Smp1`8>.#Yc15E(9IF$,BhXY1VO2aWa\*tFEP61;R0gob1Hs2!IuL&E*,7G'8.3]LT^@dJ+Y#V#(q&AMnr?)lrc3H*ig&qg+aUcrn7/P+,s>]Co9*MAS,^8rf%7SjNb\naOq_MgK8f%;72crX,nCtgY1\dFs
#HQP4rr7FCT15k22rHKI5#1'o-=*=q5$'k8Isc42%:ctE";<9;f@U3)C1TZ_.0=So>kdF)/#\A2=[+c7[%gR(?:FPPsc>5E Lock File
Deletable File
Compound Files
Per-Segment Files
Fields
Term Dictionary
Frequencies
Positions
Normalization Factors
Term Vectors
Deleted Documents
Limitations