Index: common-build.xml
===================================================================
--- common-build.xml (revision 777664)
+++ common-build.xml (working copy)
@@ -395,6 +395,7 @@
FilterIndexReader contains another IndexReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -213,7 +214,7 @@
/** @deprecated */
protected void doCommit() throws IOException { doCommit(null); }
- protected void doCommit(String commitUserData) throws IOException { in.commit(commitUserData); }
+ protected void doCommit(Map commitUserData) throws IOException { in.commit(commitUserData); }
protected void doClose() throws IOException { in.close(); }
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (revision 777664)
+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -27,6 +27,7 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Map;
/** IndexReader is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
@@ -525,9 +526,9 @@
/**
* Reads commitUserData, previously passed to {@link
- * IndexWriter#commit(String)}, from current index
+ * IndexWriter#commit(Map)}, from current index
* segments file. This will return null if {@link
- * IndexWriter#commit(String)} has never been called for
+ * IndexWriter#commit(Map)} has never been called for
* this index.
*
* @param directory where the index resides.
@@ -537,7 +538,7 @@
*
* @see #getCommitUserData()
*/
- public static String getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
+ public static Map getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
return SegmentInfos.readCurrentUserData(directory);
}
@@ -552,12 +553,12 @@
/**
* Retrieve the String userData optionally passed to
* IndexWriter#commit. This will return null if {@link
- * IndexWriter#commit(String)} has never been called for
+ * IndexWriter#commit(Map)} has never been called for
* this index.
*
* @see #getCommitUserData(Directory)
*/
- public String getCommitUserData() {
+ public Map getCommitUserData() {
throw new UnsupportedOperationException("This reader does not support this method.");
}
@@ -1017,12 +1018,13 @@
}
/**
- * @param commitUserData Opaque String that's recorded
- * into the segments file in the index, and retrievable
- * by {@link IndexReader#getCommitUserData}.
+ * @param commitUserData Opaque Map (String -> String)
+ * that's recorded into the segments file in the index,
+ * and retrievable by {@link
+ * IndexReader#getCommitUserData}.
* @throws IOException
*/
- public final synchronized void flush(String commitUserData) throws IOException {
+ public final synchronized void flush(Map commitUserData) throws IOException {
ensureOpen();
commit(commitUserData);
}
@@ -1049,7 +1051,7 @@
* (transactional semantics).
* @throws IOException if there is a low-level IO error
*/
- protected final synchronized void commit(String commitUserData) throws IOException {
+ protected final synchronized void commit(Map commitUserData) throws IOException {
if (hasChanges) {
doCommit(commitUserData);
}
@@ -1057,13 +1059,13 @@
}
/** Implements commit.
- * @deprecated Please implement {@link #doCommit(String)
+ * @deprecated Please implement {@link #doCommit(Map)
* instead}. */
protected abstract void doCommit() throws IOException;
/** Implements commit. NOTE: subclasses should override
* this. In 3.0 this will become an abstract method. */
- void doCommit(String commitUserData) throws IOException {
+ void doCommit(Map commitUserData) throws IOException {
// Default impl discards commitUserData; all Lucene
// subclasses override this (do not discard it).
doCommit();
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 777664)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -3718,6 +3718,7 @@
segmentInfos.clear(); // pop old infos & add new
info = new SegmentInfo(mergedName, docCount, directory, false, true,
-1, null, false, merger.hasProx());
+ setDiagnostics(info, "addIndexes(IndexReader[])");
segmentInfos.add(info);
}
@@ -3831,16 +3832,16 @@
* you should immediately close the writer. See above for details.
Expert: prepare for commit, specifying - * commitUserData String. This does the first phase of - * 2-phase commit. You can only call this when - * autoCommit is false. This method does all steps + * commitUserData Map (String -> String). This does the + * first phase of 2-phase commit. You can only call this + * when autoCommit is false. This method does all steps * necessary to commit changes since this writer was * opened: flushes pending added and deleted docs, syncs * the index files, writes most of next segments_N file. @@ -3849,7 +3850,7 @@ * #rollback()} to revert the commit and undo all changes * done since the writer was opened.
* - * You can also just call {@link #commit(String)} directly + * You can also just call {@link #commit(Map)} directly * without prepareCommit first in which case that method * will internally call prepareCommit. * @@ -3857,11 +3858,12 @@ * you should immediately close the writer. See above for details. * - * @param commitUserData Opaque String that's recorded - * into the segments file in the index, and retrievable - * by {@link IndexReader#getCommitUserData}. Note that - * when IndexWriter commits itself, for example if open - * with autoCommit=true, or, during {@link #close}, the + * @param commitUserData Opaque Map (String->String) + * that's recorded into the segments file in the index, + * and retrievable by {@link + * IndexReader#getCommitUserData}. Note that when + * IndexWriter commits itself, for example if open with + * autoCommit=true, or, during {@link #close}, the * commitUserData is unchanged (just carried over from * the prior commit). If this is null then the previous * commitUserData is kept. Also, the commitUserData will @@ -3869,11 +3871,11 @@ * index to commit. Therefore it's best to use this * feature only when autoCommit is false. */ - public final void prepareCommit(String commitUserData) throws CorruptIndexException, IOException { + public final void prepareCommit(Map commitUserData) throws CorruptIndexException, IOException { prepareCommit(commitUserData, false); } - private final void prepareCommit(String commitUserData, boolean internal) throws CorruptIndexException, IOException { + private final void prepareCommit(Map commitUserData, boolean internal) throws CorruptIndexException, IOException { if (hitOOM) { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); @@ -3926,22 +3928,22 @@ * href="#OOME">above for details. * * @see #prepareCommit - * @see #commit(String) + * @see #commit(Map) */ public final void commit() throws CorruptIndexException, IOException { commit(null); } /** Commits all changes to the index, specifying a - * commitUserData String. This just calls {@link - * #prepareCommit(String)} (if you didn't already call - * it) and then {@link #finishCommit}. + * commitUserData Map (String -> String). This just + * calls {@link #prepareCommit(Map)} (if you didn't + * already call it) and then {@link #finishCommit}. * *NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See above for details.
*/ - public final void commit(String commitUserData) throws CorruptIndexException, IOException { + public final void commit(Map commitUserData) throws CorruptIndexException, IOException { ensureOpen(); @@ -4136,6 +4138,7 @@ docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter.hasProx()); + setDiagnostics(newSegment, "flush"); } docWriter.pushDeletes(); @@ -4646,6 +4649,13 @@ docStoreIsCompoundFile, false); + + Map details = new HashMap(); + details.put("optimize", merge.optimize+""); + details.put("mergeFactor", end+""); + details.put("mergeDocStores", mergeDocStores+""); + setDiagnostics(merge.info, "merge", details); + // Also enroll the merged segment into mergingSegments; // this prevents it from getting selected for a merge // after our merge is done but while we are building the @@ -4653,6 +4663,25 @@ mergingSegments.add(merge.info); } + private void setDiagnostics(SegmentInfo info, String source) { + setDiagnostics(info, source, null); + } + + private void setDiagnostics(SegmentInfo info, String source, Map details) { + Map diagnostics = new HashMap(); + diagnostics.put("source", source); + diagnostics.put("lucene.version", Constants.LUCENE_VERSION); + diagnostics.put("os", Constants.OS_NAME+""); + diagnostics.put("os.arch", Constants.OS_ARCH+""); + diagnostics.put("os.version", Constants.OS_VERSION+""); + diagnostics.put("java.version", Constants.JAVA_VERSION+""); + diagnostics.put("java.vendor", Constants.JAVA_VENDOR+""); + if (details != null) { + diagnostics.putAll(details); + } + info.setDiagnostics(diagnostics); + } + /** This is called after merging a segment and before * building its CFS. Return true if the files should be * sync'd. If you return false, then the source segment @@ -5142,7 +5171,7 @@ * if it wasn't already. If that succeeds, then we * prepare a new segments_N file but do not fully commit * it. */ - private void startCommit(long sizeInBytes, String commitUserData) throws IOException { + private void startCommit(long sizeInBytes, Map commitUserData) throws IOException { assert testPoint("startStartCommit"); Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 777664) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -585,7 +585,7 @@ long version; long generation; final boolean isOptimized; - final String userData; + final Map userData; public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException { this.directory = directory; @@ -625,7 +625,7 @@ return generation; } - public String getUserData() { + public Map getUserData() { return userData; } Index: src/java/org/apache/lucene/store/IndexInput.java =================================================================== --- src/java/org/apache/lucene/store/IndexInput.java (revision 777664) +++ src/java/org/apache/lucene/store/IndexInput.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.IOException; +import java.util.Map; +import java.util.HashMap; /** Abstract base class for input from a file in a {@link Directory}. A * random-access input stream. Used for all Lucene index input operations. @@ -226,4 +228,16 @@ return clone; } + // returns Map+ In a couple places Lucene stores a Map + String->String. +
+ ++ Map<String,String> --> Count<String,String>Count +
+ +@@ -842,21 +857,29 @@ DocStoreOffset, DeletionCount --> Int32
-+
Version, DelGen, NormGen, Checksum --> Int64
- SegName, DocStoreSegment, CommitUserData --> String + SegName, DocStoreSegment --> String
++ Diagnostics --> Map<String,String> +
+IsCompoundFile, HasSingleNormFile, - DocStoreIsCompoundFile, HasProx, HasUserData --> Int8 + DocStoreIsCompoundFile, HasProx --> Int8
++ CommitUserData --> Map<String,String> +
+- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9. + Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -9 (SegmentInfos.FORMAT_DIAGNOSTICS) as of Lucene 2.9.
@@ -958,11 +981,18 @@
- If HasUserData is 1, then the string - CommitUserData is non-null and is stored. This is - a string previously passed to IndexWriter's commit - or prepareCommit method. + CommitUserData stores an optional user-supplied + opaque Map<String,String> that was passed to + IndexWriter's commit or prepareCommit, or + IndexReader's flush methods.
++ The Diagnostics Map is privately written by + IndexWriter, as a debugging aid, for each segment + it creates. It includes metadata like the current + Lucene version, OS, Java version, why the segment + was created (merge, flush, addIndexes), etc. +
Cc`IcFI[>!*[q,(C<5FO/G&hoJ'M>&lTf1DK/WXVZg0k@PWoq:JBaalA %#1h63YES#=kMbIF>epV:e0Ro2P.a9*(l;WdG=NW*H[n[kfTdHTX8k=5[)s9$m<
+ In a couple places Lucene stores a Map
+ String->String.
+
+ Map<String,String> --> Count<String,String>Count
+
+
The files in this section exist one-per-index.
The active segments in the index are stored in the
@@ -1343,7 +1367,7 @@
2.9 and above:
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGenNumField,
- IsCompoundFile, DeletionCount, HasProx>SegCount, HasUserData, CommitUserData?, Checksum
+ IsCompoundFile, DeletionCount, HasProx, Diagnostics>SegCount, CommitUserData, Checksum
Format, NameCounter, SegCount, SegSize, NumField,
@@ -1353,16 +1377,22 @@
Version, DelGen, NormGen, Checksum --> Int64
- SegName, DocStoreSegment, CommitUserData --> String
+ SegName, DocStoreSegment --> String
+ Diagnostics --> Map<String,String>
+
IsCompoundFile, HasSingleNormFile,
- DocStoreIsCompoundFile, HasProx, HasUserData --> Int8
+ DocStoreIsCompoundFile, HasProx --> Int8
- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9.
+ CommitUserData --> Map<String,String>
+ Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -9 (SegmentInfos.FORMAT_DIAGNOSTICS) as of Lucene 2.9.
+
Version counts how often the index has been
changed by adding or deleting documents.
- If HasUserData is 1, then the string
- CommitUserData is non-null and is stored. This is
- a string previously passed to IndexWriter's commit
- or prepareCommit method.
+ CommitUserData stores an optional user-supplied
+ opaque Map<String,String> that was passed to
+ IndexWriter's commit or prepareCommit, or
+ IndexReader's flush methods.
+ The Diagnostics Map is privately written by
+ IndexWriter, as a debugging aid, for each segment
+ it creates. It includes metadata like the current
+ Lucene version, OS, Java version, why the segment
+ was created (merge, flush, addIndexes), etc.
+
The write lock, which is stored in the index
@@ -1471,7 +1508,7 @@
Note that prior to version 2.1, Lucene also used a
commit lock. This was removed in 2.1.
Prior to Lucene 2.1 there was a file "deletable"
@@ -1480,7 +1517,7 @@
the files that are deletable, instead, so no file
is written.
Starting with Lucene 1.4 the compound file format became default. This
is simply a container for all files described in the next section
@@ -1507,14 +1544,14 @@
The remaining files are all per-segment, and are
thus defined by suffix.
The term dictionary is represented as two files:
@@ -1926,7 +1963,7 @@
-
+
The .frq file contains the lists of documents
@@ -2054,7 +2091,7 @@
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
to entry 31 on level 0.
The .prx file contains the lists of positions that
@@ -2124,7 +2161,7 @@
Payload. If PayloadLength is not stored, then this Payload has the same
length as the Payload at the previous position.
@@ -2228,7 +2265,7 @@
2.1 and above:
Separate norm files are created (when adequate) for both compound and non compound segments.
Term Vector support is an optional on a field by
@@ -2361,7 +2398,7 @@
-
+
The .del file is
optional, and only exists when a segment contains deletions.
@@ -2433,7 +2470,7 @@
=X/5U/&O;(.+>L<07VEfq0pd][>*R5o6_Eke>WEsk%rrQ543]/s?(6+/Qg\9c?(HbjIfW@0_<042f6\h/srm`BKD.askKI`'_Uel;Gs/]eO`#C"/Zj*q#JT(;FIg&nsF
endstream
endobj
100 0 obj
@@ -604,10 +604,10 @@
>>
endobj
101 0 obj
-<< /Length 1528 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1395 /Filter [ /ASCII85Decode /FlateDecode ]
>>
stream
-Gau0D95iiK&AJ$C#eB:+L_Z%u(Q0h&mCL@_:!uH!V'uc)U20k9kS5C*rqE612MVq0-ANetCe,"J^3L67)bn%:PFiM!XSBQf-_Y*TL?Ej[\)TAr:L+4ss%6`O-@\d.5>6MhEt-W87Hqa*:KdT#)9MU.5t_D,Kkt1i$dVYLaFai@_X69).69D`-9JaBTX&Ld(;h_\VV8HX,W5V)C;pO;q[UO0E$3O:6Bijd`/c^"ss\'+LWD:8)ugP:@BW;3KpkDja1qUR+o?2o4SSUQ:#.RM^m!ZFp//E5S>a%"5iEM@/,K4#Mu=3?_P`W1LWC7Nf[J-te,=fF:eG!nBg-2C]-@[RfTKco;A[nZ5-H%#a_gHL6IED+cF5hI1b286A4OjaEi^U>25^!maRBKhoLP&]&[GU84OFuYeVlc1hnU]OmAar+9QWCiQkU_=l5AjWR'='L--%Q7QKZKGL>GU@Ds8os_$r5Z;1TDA6EQb19C%(2@0n\/=m!r*e*:.)'KlWGF@NgBKSYAii)jNt].)IBn.8mS85'SsOd=^Td$)M3dXWB@%s0Dc)74*^+mm+>Wo"Y`%15ssnul>SnO,gG368[5;H5it_1o'IARIor<$q2]j>FrfPi:83K^93#Smp1`8>.#Yc15E(9IF$,BhXY1VO2aWa\*tFEP61;R0gob1Hs2!IuL&E*,7G'8.3]LT^@dJ+Y#V#(q&AMnr?)lrc3H*ig&qg+aUcrn7/P+,s>]Co9*MAS,^8rf%7SjNb\naOq_MgK8f%;72crX,nCtgY1\dFs
#HQP4rr7FCT15k22rHKI5#1'o-=*=q5$'k8Isc42%:ctE";<9;f@U3)C1TZ_.0=So>kdF)/#\A2=[+c7[%gR(?:FPPsc>5E
+
+
Compound Types
+Map<String,String>
+Per-Index Files
Segments File
Lock File
Deletable File
Compound Files
Per-Segment Files
Fields
Term Dictionary
Frequencies
Positions
Normalization Factors
Term Vectors
Deleted Documents
Limitations