Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1061495) +++ lucene/CHANGES.txt (working copy) @@ -140,6 +140,9 @@ documents that don't have the field get a norm byte value of 0. Previously, Lucene would populate "fake norms" with Similarity.getDefault() for these documents. (Robert Muir, Mike Mccandless) + +* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather + than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler) API Changes Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1061495) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -171,15 +171,7 @@ try { writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()) - .setMergeScheduler(new SerialMergeScheduler()) // no threads! - ); - // TODO: Make IndexWriter fail on open! - if (random.nextBoolean()) { - writer.optimize(); - } else { - reader = writer.getReader(); - } + TEST_VERSION_CURRENT, new MockAnalyzer())); fail("IndexWriter creation should not pass for "+unsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass @@ -188,17 +180,13 @@ e.printStackTrace(System.out); } } finally { - if (reader != null) reader.close(); - reader = null; + // we should fail to open IW, and so it should be null when we get here. + // However, if the test fails (i.e., IW did not fail on open), we need + // to close IW. However, if merges are run, IW may throw + // IndexFormatTooOldException, and we don't want to mask the fail() + // above, so close without waiting for merges. if (writer != null) { - try { - writer.close(); - } catch (IndexFormatTooOldException e) { - // OK -- since IW gives merge scheduler a chance - // to merge at close, it's possible and fine to - // hit this exc here - writer.close(false); - } + writer.close(false); } writer = null; } Index: lucene/src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldsReader.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/FieldsReader.java (working copy) @@ -37,8 +37,10 @@ * Class responsible for access to stored document fields. *
* It uses <segment>.fdt and <segment>.fdx; files. + * + * @lucene.internal */ -final class FieldsReader implements Cloneable { +public final class FieldsReader implements Cloneable { private final static int FORMAT_SIZE = 4; private final FieldInfos fieldInfos; @@ -74,7 +76,24 @@ ensureOpen(); return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); } + + /** Verifies that the code version which wrote the segment is supported. */ + public static void checkCodeVersion(Directory dir, String segment) throws IOException { + final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION); + IndexInput idxStream = dir.openInput(indexStreamFN, 1024); + + try { + int format = idxStream.readInt(); + if (format < FieldsWriter.FORMAT_MINIMUM) + throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT); + if (format > FieldsWriter.FORMAT_CURRENT) + throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT); + } finally { + idxStream.close(); + } + } + // Used only by clone private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) { @@ -89,11 +108,11 @@ indexStream = (IndexInput) cloneableIndexStream.clone(); } - FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { + public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0); } - FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException { + public FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException { boolean success = false; isOriginal = true; try { @@ -157,7 +176,7 @@ * * @throws IOException */ - final void close() throws IOException { + public final void close() throws IOException { if (!closed) { if (fieldsStream != null) { fieldsStream.close(); @@ -178,7 +197,7 @@ } } - final int size() { + public final int size() { return size; } @@ -186,7 +205,7 @@ indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); } - final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { seekIndex(n); long position = indexStream.readLong(); fieldsStream.seek(position); @@ -237,7 +256,7 @@ * contiguous range of length numDocs starting with * startDocID. Returns the IndexInput (the fieldStream), * already seeked to the starting point for startDocID.*/ - final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { + public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { seekIndex(startDocID); long startOffset = indexStream.readLong(); long lastOffset = startOffset; Index: lucene/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Constants; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; @@ -87,6 +88,10 @@ private Map+ * NOTE: this method is used for internal purposes only - you should + * not modify the version of a SegmentInfo, or it may result in unexpected + * exceptions thrown when you attempt to open the index. + * + * @lucene.internal + */ + public void setVersion(String version) { + this.version = version; + } + + /** Returns the version of the code which wrote the segment. */ + public String getVersion() { + return version; + } + } Index: lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java (working copy) @@ -23,10 +23,15 @@ */ public class IndexFormatTooOldException extends CorruptIndexException { + public IndexFormatTooOldException(String filename, String version) { + super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") + + ": " + version + ". This version of Lucene only supports indexes created with release 3.0 and later."); + } + public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) { super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") + - ": " + version + " (needs to be between " + minVersion + " and " + maxVersion + - "). This version of Lucene only supports indexes created with release 3.0 and later."); + ": " + version + " (needs to be between " + minVersion + " and " + maxVersion + + "). This version of Lucene only supports indexes created with release 3.0 and later."); } } Index: lucene/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -226,6 +226,7 @@ assert storeDir != null; } + // nocommit: this can be simplified to always be si.getDocStoreSegment() final String storesSegment; if (si.getDocStoreOffset() != -1) { storesSegment = si.getDocStoreSegment(); Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -605,8 +605,6 @@ } } - - /** * Obtain the number of deleted docs for a pooled reader. * If the reader isn't being pooled, the segmentInfo's @@ -715,11 +713,8 @@ boolean success = false; - // TODO: we should check whether this index is too old, - // and throw an IndexFormatTooOldExc up front, here, - // instead of later when merge, applyDeletes, getReader - // is attempted. I think to do this we should store the - // oldest segment's version in segments_N. + // If index is too old, reading the segments will throw + // IndexFormatTooOldException. segmentInfos = new SegmentInfos(codecs); try { if (create) { @@ -982,6 +977,7 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ + @Override public void close() throws CorruptIndexException, IOException { close(true); } Index: lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java (working copy) @@ -38,9 +38,12 @@ /** Each segment records whether it has term vectors */ public static final int FORMAT_HAS_VECTORS = -10; + /** Each segment records the Lucene version that created it. */ + public static final int FORMAT_3_1 = -11; + /** Each segment records whether its postings are written * in the new flex format */ - public static final int FORMAT_4_0 = -11; + public static final int FORMAT_4_0 = -12; /** This must always point to the most recent file format. * whenever you add a new format, make it 1 smaller (negative version logic)! */ Index: lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java (working copy) @@ -19,7 +19,10 @@ import java.io.IOException; +import org.apache.lucene.index.CompoundFileReader; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldsReader; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.SegmentInfo; @@ -55,7 +58,41 @@ infos.counter = input.readInt(); // read counter for (int i = input.readInt(); i > 0; i--) { // read segmentInfos - infos.add(new SegmentInfo(directory, format, input, codecs)); + SegmentInfo si = new SegmentInfo(directory, format, input, codecs); + if (si.getVersion() == null) { + // Could be a 3.0 - try to open the doc stores - if it fails, it's a + // 2.x segment, and an IndexFormatTooOldException will be thrown, + // which is what we want. + Directory dir = directory; + if (si.getDocStoreOffset() != -1) { + if (si.getDocStoreIsCompoundFile()) { + dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName( + si.getDocStoreSegment(), "", + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024); + } + } else if (si.getUseCompoundFile()) { + dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName( + si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024); + } + + try { + FieldsReader.checkCodeVersion(dir, si.getDocStoreSegment()); + } finally { + // If we opened the directory, close it + if (dir != directory) dir.close(); + } + + // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next + // time the segment is read, its version won't be null and we won't + // need to open FieldsReader every time for each such segment. + si.setVersion("3.0"); + } else if (si.getVersion().equals("2.x")) { + // If it's a 3x index touched by 3.1+ code, then segments record their + // version, whether they are 2.x ones or not. We detect that and throw + // appropriate exception. + throw new IndexFormatTooOldException(si.name, si.getVersion()); + } + infos.add(si); } infos.userData = input.readStringStringMap(); Index: lucene/src/java/org/apache/lucene/util/Constants.java =================================================================== --- lucene/src/java/org/apache/lucene/util/Constants.java (revision 1061495) +++ lucene/src/java/org/apache/lucene/util/Constants.java (working copy) @@ -70,6 +70,9 @@ return s.toString(); } + // NOTE: we track per-segment version as a String with the "X.Y" format, e.g. + // "4.0", "3.1", "3.0". Therefore when we change this constant, we should keep + // the format. public static final String LUCENE_MAIN_VERSION = ident("4.0"); public static final String LUCENE_VERSION;