Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 949382) +++ lucene/CHANGES.txt (working copy) @@ -74,6 +74,10 @@ character. Furthermore, the rest of the automaton package and RegexpQuery use true Unicode codepoint representation. (Robert Muir, Mike McCandless) +* LUCENE-2480: Though not a change in backwards compatibility policy, pre-3.0 + indexes are no longer supported. You should upgrade to 3.x first, then run + optimize(), or reindex. (Shai Erera) + Changes in runtime behavior * LUCENE-2421: NativeFSLockFactory does not throw LockReleaseFailedException if Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/src/java/org/apache/lucene/index/CheckIndex.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -342,33 +343,13 @@ String sFormat = ""; boolean skip = false; - if (format == SegmentInfos.FORMAT) - sFormat = "FORMAT [Lucene Pre-2.1]"; - if (format == SegmentInfos.FORMAT_LOCKLESS) - sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; - else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) - sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; - else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) - sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; - else { - if (format == SegmentInfos.FORMAT_CHECKSUM) - sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; - else if (format == SegmentInfos.FORMAT_DEL_COUNT) - sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; - else if (format == SegmentInfos.FORMAT_HAS_PROX) - sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; - else if (format == SegmentInfos.FORMAT_USER_DATA) - sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; - else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) - sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; - else if (format == SegmentInfos.FORMAT_FLEX_POSTINGS) - sFormat = "FORMAT_FLEX_POSTINGS [Lucene 3.1]"; - else if (format < SegmentInfos.CURRENT_FORMAT) { - sFormat = "int=" + format + " [newer version of Lucene than this tool]"; - skip = true; - } else { - sFormat = format + " [Lucene 1.3 or prior]"; - } + if (format == SegmentInfos.FORMAT_DIAGNOSTICS) + sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; + else if (format == SegmentInfos.FORMAT_FLEX_POSTINGS) + sFormat = "FORMAT_FLEX_POSTINGS [Lucene 4.0]"; + else if (format < SegmentInfos.CURRENT_FORMAT) { + sFormat = "int=" + format + " [newer version of Lucene than this tool]"; + skip = true; } result.segmentsFileName = segmentsFileName; @@ -656,7 +637,7 @@ int lastDoc = -1; while(true) { final int doc = docs2.nextDoc(); - if (doc == DocsEnum.NO_MORE_DOCS) { + if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } final int freq = docs2.freq(); @@ -698,7 +679,7 @@ if (reader.hasDeletions()) { final DocsEnum docsNoDel = terms.docs(null, docs); int count = 0; - while(docsNoDel.nextDoc() != DocsEnum.NO_MORE_DOCS) { + while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } if (count != docFreq) { Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -639,10 +639,11 @@ consumer.flush(threads, flushState); if (infoStream != null) { - SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory, flushState.codec); - si.setHasProx(hasProx()); + SegmentInfo si = new SegmentInfo(flushState.segmentName, + flushState.numDocs, directory, false, -1, flushState.segmentName, + false, hasProx(), flushState.codec); final long newSegmentSize = si.sizeInBytes(); - String message = " ramUsed=" + nf.format(((double) numBytesUsed)/1024./1024.) + " MB" + + String message = " ramUsed=" + nf.format(numBytesUsed/1024./1024.) + " MB" + " newFlushedSize=" + newSegmentSize + " docs/MB=" + nf.format(numDocsInRAM/(newSegmentSize/1024./1024.)) + " new/old=" + nf.format(100.0*newSegmentSize/numBytesUsed) + "%"; Index: lucene/src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldInfos.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -36,9 +36,6 @@ */ public final class FieldInfos { - // Used internally (ie not written to *.fnm files) for pre-2.9 files - public static final int FORMAT_PRE = -1; - // First used in 2.9; prior to 2.9 there was no format header public static final int FORMAT_START = -2; @@ -68,29 +65,7 @@ FieldInfos(Directory d, String name) throws IOException { IndexInput input = d.openInput(name); try { - try { - read(input, name); - } catch (IOException ioe) { - if (format == FORMAT_PRE) { - // LUCENE-1623: FORMAT_PRE (before there was a - // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8) - // encoding; retry with input set to pre-utf8 - input.seek(0); - input.setModifiedUTF8StringsMode(); - byNumber.clear(); - byName.clear(); - try { - read(input, name); - } catch (Throwable t) { - // Ignore any new exception & throw original IOE - throw ioe; - } - } else { - // The IOException cannot be caused by - // LUCENE-1623, so re-throw it - throw ioe; - } - } + read(input, name); } finally { input.close(); } @@ -330,25 +305,13 @@ } private void read(IndexInput input, String fileName) throws IOException { - int firstInt = input.readVInt(); + format = input.readVInt(); - if (firstInt < 0) { - // This is a real format - format = firstInt; - } else { - format = FORMAT_PRE; - } - - if (format != FORMAT_PRE & format != FORMAT_START) { + if (format > FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } - int size; - if (format == FORMAT_PRE) { - size = firstInt; - } else { - size = input.readVInt(); //read in the size - } + final int size = input.readVInt(); //read in the size for (int i = 0; i < size; i++) { String name = StringHelper.intern(input.readString()); Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -2969,8 +2969,8 @@ SegmentInfo info = null; synchronized(this) { - info = new SegmentInfo(mergedName, docCount, directory, false, true, - -1, null, false, merger.hasProx(), merger.getCodec()); + info = new SegmentInfo(mergedName, docCount, directory, false, -1, + null, false, merger.hasProx(), merger.getCodec()); setDiagnostics(info, "addIndexes(IndexReader...)"); segmentInfos.add(info); checkpoint(); @@ -3335,10 +3335,9 @@ // successfully. newSegment = new SegmentInfo(segment, flushedDocCount, - directory, false, true, - docStoreOffset, docStoreSegment, - docStoreIsCompoundFile, - docWriter.hasProx(), + directory, false, docStoreOffset, + docStoreSegment, docStoreIsCompoundFile, + docWriter.hasProx(), docWriter.getCodec()); setDiagnostics(newSegment, "flush"); @@ -3853,8 +3852,7 @@ // ConcurrentMergePolicy we keep deterministic segment // names. merge.info = new SegmentInfo(newSegmentName(), 0, - directory, false, true, - docStoreOffset, + directory, false, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, false, Index: lucene/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentInfo.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -42,41 +42,30 @@ static final int NO = -1; // e.g. no norms; no deletes; static final int YES = 1; // e.g. have norms; have deletes; - static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. public String name; // unique name in dir public int docCount; // number of docs in seg public Directory dir; // where segment resides - private boolean preLockless; // true if this is a segments file written before - // lock-less commits (2.1) + /* + * Current generation of del file: + * - NO if there are no deletes + * - YES or higher if there are deletes at generation N + */ + private long delGen; + + /* + * Current generation of each field's norm file. If this array is null, + * means no separate norms. If this array is not null, its values mean: + * - NO says this field has no separate norms + * >= YES says this field has separate norms with the specified generation + */ + private long[] normGen; - private long delGen; // current generation of del file; NO if there - // are no deletes; CHECK_DIR if it's a pre-2.1 segment - // (and we must check filesystem); YES or higher if - // there are deletes at generation N - - private long[] normGen; // current generation of each field's norm file. - // If this array is null, for lockLess this means no - // separate norms. For preLockLess this means we must - // check filesystem. If this array is not null, its - // values mean: NO says this field has no separate - // norms; CHECK_DIR says it is a preLockLess segment and - // filesystem must be checked; >= YES says this field - // has separate norms with the specified generation + private boolean isCompoundFile; - private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's - // pre-2.1 (ie, must check file system to see - // if .cfs and .nrm exist) - - private boolean hasSingleNormFile; // true if this segment maintains norms in a single file; - // false otherwise - // this is currently false for segments populated by DocumentWriter - // and true for newly created merged segments (both - // compound and non compound). - - private List files; // cached list of files that this segment uses + private List files; // cached list of files that this segment uses // in the Directory long sizeInBytes = -1; // total byte size of all of our files (computed on demand) @@ -97,29 +86,13 @@ private Map diagnostics; - public SegmentInfo(String name, int docCount, Directory dir, Codec codec) { + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, int docStoreOffset, + String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) { this.name = name; this.docCount = docCount; this.dir = dir; delGen = NO; - isCompoundFile = CHECK_DIR; - preLockless = true; - hasSingleNormFile = false; - docStoreOffset = -1; - docStoreSegment = name; - docStoreIsCompoundFile = false; - delCount = 0; - hasProx = true; - this.codec = codec; - } - - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, - int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, - Codec codec) { - this(name, docCount, dir, codec); - this.isCompoundFile = (byte) (isCompoundFile ? YES : NO); - this.hasSingleNormFile = hasSingleNormFile; - preLockless = false; + this.isCompoundFile = isCompoundFile; this.docStoreOffset = docStoreOffset; this.docStoreSegment = docStoreSegment; this.docStoreIsCompoundFile = docStoreIsCompoundFile; @@ -137,7 +110,6 @@ name = src.name; docCount = src.docCount; dir = src.dir; - preLockless = src.preLockless; delGen = src.delGen; docStoreOffset = src.docStoreOffset; docStoreIsCompoundFile = src.docStoreIsCompoundFile; @@ -148,7 +120,6 @@ System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); } isCompoundFile = src.isCompoundFile; - hasSingleNormFile = src.hasSingleNormFile; delCount = src.delCount; codec = src.codec; } @@ -174,72 +145,43 @@ name = input.readString(); docCount = input.readInt(); final String codecName; - if (format <= SegmentInfos.FORMAT_LOCKLESS) { - delGen = input.readLong(); - if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { - docStoreOffset = input.readInt(); - if (docStoreOffset != -1) { - docStoreSegment = input.readString(); - docStoreIsCompoundFile = (1 == input.readByte()); - } else { - docStoreSegment = name; - docStoreIsCompoundFile = false; - } - } else { - docStoreOffset = -1; - docStoreSegment = name; - docStoreIsCompoundFile = false; + delGen = input.readLong(); + docStoreOffset = input.readInt(); + if (docStoreOffset != -1) { + docStoreSegment = input.readString(); + docStoreIsCompoundFile = (1 == input.readByte()); + } else { + docStoreSegment = name; + docStoreIsCompoundFile = false; + } + // single norms file + assert 1 == input.readByte(); + int numNormGen = input.readInt(); + if (numNormGen == NO) { + normGen = null; + } else { + normGen = new long[numNormGen]; + for(int j=0;jemptyMap(); - } - } else { - delGen = CHECK_DIR; - normGen = null; - isCompoundFile = CHECK_DIR; - preLockless = true; - hasSingleNormFile = false; - docStoreOffset = -1; - docStoreIsCompoundFile = false; - docStoreSegment = null; - delCount = -1; - hasProx = true; + hasProx = input.readByte() == 1; + + // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name); + + if (format <= SegmentInfos.FORMAT_FLEX_POSTINGS) + codecName = input.readString(); + else codecName = "PreFlex"; + + if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) { + diagnostics = input.readStringStringMap(); + } else { diagnostics = Collections.emptyMap(); } codec = codecs.lookup(codecName); @@ -252,16 +194,10 @@ // norms set against it yet: normGen = new long[numFields]; - if (preLockless) { - // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know - // we have to check filesystem for norm files, because this is prelockless. - - } else { - // This is a FORMAT_LOCKLESS segment, which means - // there are no separate norms: - for(int i=0;i= YES: this means this segment was written by // the LOCKLESS code and for certain has // deletions // - if (delGen == NO) { - return false; - } else if (delGen >= YES) { - return true; - } else { - return dir.fileExists(getDelFileName()); - } + return delGen != NO; } void advanceDelGen() { @@ -325,14 +251,12 @@ } @Override - public Object clone () { - SegmentInfo si = new SegmentInfo(name, docCount, dir, codec); + public Object clone() { + SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, hasProx, codec); si.isCompoundFile = isCompoundFile; si.delGen = delGen; si.delCount = delCount; si.hasProx = hasProx; - si.preLockless = preLockless; - si.hasSingleNormFile = hasSingleNormFile; si.diagnostics = new HashMap(diagnostics); if (normGen != null) { si.normGen = normGen.clone(); @@ -360,17 +284,8 @@ * * @param fieldNumber the field index to check */ - public boolean hasSeparateNorms(int fieldNumber) - throws IOException { - if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) { - // Must fallback to directory file exists check: - String fileName = name + ".s" + fieldNumber; - return dir.fileExists(fileName); - } else if (normGen == null || normGen[fieldNumber] == NO) { - return false; - } else { - return true; - } + public boolean hasSeparateNorms(int fieldNumber) throws IOException { + return !(normGen == null || normGen[fieldNumber] == NO); } /** @@ -379,45 +294,16 @@ public boolean hasSeparateNorms() throws IOException { if (normGen == null) { - if (!preLockless) { - // This means we were created w/ LOCKLESS code and no - // norms are written yet: - return false; - } else { - // This means this segment was saved with pre-LOCKLESS - // code. So we must fallback to the original - // directory list check: - String[] result = dir.listAll(); - if (result == null) - throw new IOException("cannot read directory " + dir + ": listAll() returned null"); - - final String pattern = name + ".s\\d+"; - for(int i = 0; i < result.length; i++){ - String fileName = result[i]; - if (fileName.matches(pattern)) { - return true; - } - } - return false; - } + return false; } else { // This means this segment was saved with LOCKLESS // code so we first check whether any normGen's are >= 1 // (meaning they definitely have separate norms): - for(int i=0;i= YES) { return true; } } - // Next we look for any == 0. These cases were - // pre-LOCKLESS and must be checked in directory: - for(int i=0;i(fileSet); Index: lucene/src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentInfos.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -45,47 +45,17 @@ */ public final class SegmentInfos extends Vector { - /** The file format version, a negative number. */ - /* Works since counter, the old 1st entry, is always >= 0 */ - public static final int FORMAT = -1; - - /** This format adds details used for lockless commits. It differs - * slightly from the previous format in that file names - * are never re-used (write once). Instead, each file is - * written to the next generation. For example, - * segments_1, segments_2, etc. This allows us to not use - * a commit lock. See file - * formats for details. + /* + * The file format version, a negative number. + * + * NOTE: future format numbers must always be one smaller + * than the latest. With time, support for old formats will + * be removed, however the numbers should continue to decrease. */ - public static final int FORMAT_LOCKLESS = -2; - /** This format adds a "hasSingleNormFile" flag into each segment info. - * See LUCENE-756 - * for details. - */ - public static final int FORMAT_SINGLE_NORM_FILE = -3; - - /** This format allows multiple segments to share a single - * vectors and stored fields file. */ - public static final int FORMAT_SHARED_DOC_STORE = -4; - - /** This format adds a checksum at the end of the file to - * ensure all bytes were successfully written. */ - public static final int FORMAT_CHECKSUM = -5; - - /** This format adds the deletion count for each segment. - * This way IndexWriter can efficiently report numDocs(). */ - public static final int FORMAT_DEL_COUNT = -6; - - /** This format adds the boolean hasProx to record if any - * fields in the segment store prox information (ie, have - * omitTermFreqAndPositions==false) */ - public static final int FORMAT_HAS_PROX = -7; - - /** This format adds optional commit userData (String) storage. */ - public static final int FORMAT_USER_DATA = -8; - + /** Used for the segments.gen file only! */ + public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2; + /** This format adds optional per-segment String * diagnostics storage, and switches userData to Map */ public static final int FORMAT_DIAGNOSTICS = -9; @@ -98,6 +68,7 @@ static final int CURRENT_FORMAT = FORMAT_FLEX_POSTINGS; public int counter = 0; // used to name new segments + /** * counts how often the index has been changed by adding or deleting docs. * starting with the current time in milliseconds forces to create unique version numbers. @@ -270,24 +241,15 @@ version = input.readLong(); // read version } - if (format <= FORMAT_USER_DATA) { - if (format <= FORMAT_DIAGNOSTICS) { - userData = input.readStringStringMap(); - } else if (0 != input.readByte()) { - userData = Collections.singletonMap("userData", input.readString()); - } else { - userData = Collections.emptyMap(); - } - } else { - userData = Collections.emptyMap(); + if (format <= FORMAT_DIAGNOSTICS) { + userData = input.readStringStringMap(); } - if (format <= FORMAT_CHECKSUM) { - final long checksumNow = input.getChecksum(); - final long checksumThen = input.readLong(); - if (checksumNow != checksumThen) - throw new CorruptIndexException("checksum mismatch in segments file"); - } + final long checksumNow = input.getChecksum(); + final long checksumThen = input.readLong(); + if (checksumNow != checksumThen) + throw new CorruptIndexException("checksum mismatch in segments file"); + success = true; } finally { @@ -612,7 +574,7 @@ if (genInput != null) { try { int version = genInput.readInt(); - if (version == FORMAT_LOCKLESS) { + if (version == FORMAT_SEGMENTS_GEN_CURRENT) { long gen0 = genInput.readLong(); long gen1 = genInput.readLong(); if (infoStream != null) { @@ -858,9 +820,7 @@ // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. - final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, - "", - generation); + final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.sync(Collections.singleton(fileName)); @@ -880,7 +840,7 @@ try { IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN); try { - genOutput.writeInt(FORMAT_LOCKLESS); + genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT); genOutput.writeLong(generation); genOutput.writeLong(generation); } finally { Index: lucene/src/java/org/apache/lucene/index/TermVectorsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsReader.java (revision 949382) +++ lucene/src/java/org/apache/lucene/index/TermVectorsReader.java (working copy) @@ -29,11 +29,7 @@ // NOTE: if you make a new format, it must be larger than // the current format - static final int FORMAT_VERSION = 2; - // Changes to speed up bulk merging of term vectors: - static final int FORMAT_VERSION2 = 3; - // Changed strings to UTF8 with length-in-bytes not length-in-chars static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4; @@ -87,13 +83,8 @@ assert format == tvdFormat; assert format == tvfFormat; - if (format >= FORMAT_VERSION2) { - assert (tvx.length()-FORMAT_SIZE) % 16 == 0; - numTotalDocs = (int) (tvx.length() >> 4); - } else { - assert (tvx.length()-FORMAT_SIZE) % 8 == 0; - numTotalDocs = (int) (tvx.length() >> 3); - } + assert (tvx.length()-FORMAT_SIZE) % 16 == 0; + numTotalDocs = (int) (tvx.length() >> 4); if (-1 == docStoreOffset) { this.docStoreOffset = 0; @@ -134,10 +125,7 @@ } final private void seekTvx(final int docNum) throws IOException { - if (format < FORMAT_VERSION2) - tvx.seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE); - else - tvx.seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE); + tvx.seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE); } boolean canReadRawDocs() { @@ -160,7 +148,7 @@ // SegmentMerger calls canReadRawDocs() first and should // not call us if that returns false. - if (format < FORMAT_VERSION2) + if (format < FORMAT_UTF8_LENGTH_IN_BYTES) throw new IllegalStateException("cannot read raw docs with older term vector formats"); seekTvx(startDocID); @@ -242,11 +230,7 @@ int number = 0; int found = -1; for (int i = 0; i < fieldCount; i++) { - if (format >= FORMAT_VERSION) - number = tvd.readVInt(); - else - number += tvd.readVInt(); - + number = tvd.readVInt(); if (number == fieldNumber) found = i; } @@ -255,11 +239,7 @@ // document if (found != -1) { // Compute position in the tvf file - long position; - if (format >= FORMAT_VERSION2) - position = tvx.readLong(); - else - position = tvd.readVLong(); + long position = tvx.readLong(); for (int i = 1; i <= found; i++) position += tvd.readVLong(); @@ -297,11 +277,7 @@ String[] fields = new String[fieldCount]; for (int i = 0; i < fieldCount; i++) { - if (format >= FORMAT_VERSION) - number = tvd.readVInt(); - else - number += tvd.readVInt(); - + number = tvd.readVInt(); fields[i] = fieldInfos.fieldName(number); } @@ -312,11 +288,7 @@ // tvx/tvd to the right point final private long[] readTvfPointers(int fieldCount) throws IOException { // Compute position in the tvf file - long position; - if (format >= FORMAT_VERSION2) - position = tvx.readLong(); - else - position = tvd.readVLong(); + long position = tvx.readLong(); long[] tvfPointers = new long[fieldCount]; tvfPointers[0] = position; @@ -425,16 +397,10 @@ boolean storePositions; boolean storeOffsets; - if (format >= FORMAT_VERSION){ - byte bits = tvf.readByte(); - storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; - storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; - } - else{ - tvf.readVInt(); - storePositions = false; - storeOffsets = false; - } + byte bits = tvf.readByte(); + storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; + storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; + mapper.setExpectations(field, numTerms, storeOffsets, storePositions); int start = 0; int deltaLength = 0; Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 949382) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -22,14 +22,11 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; import java.io.OutputStream; import java.util.Arrays; import java.util.Random; import java.util.Enumeration; import java.util.List; -import java.util.ArrayList; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -37,8 +34,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.document.NumericField; import org.apache.lucene.search.DocIdSetIterator; @@ -50,13 +45,12 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; import org.apache.lucene.util.BytesRef; /* - Verify we can read the pre-2.1 file format, do searches + Verify we can read the pre-4.0 file format, do searches against it, and add documents to it. */ @@ -128,94 +122,13 @@ } */ - final String[] oldNames = {"19.cfs", - "19.nocfs", - "20.cfs", - "20.nocfs", - "21.cfs", - "21.nocfs", - "22.cfs", - "22.nocfs", - "23.cfs", - "23.nocfs", - "24.cfs", - "24.nocfs", - "29.cfs", - "29.nocfs", - "30.cfs", + final String[] oldNames = {"30.cfs", "30.nocfs", "31.cfs", "31.nocfs", }; - private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException { - int count = 0; - final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2; - // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields: - final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length; - - IndexReader reader = IndexReader.open(dir, true); - try { - // look into sub readers and check if raw merge is on/off - List readers = new ArrayList(); - ReaderUtil.gatherSubReaders(readers, reader); - for (IndexReader ir : readers) { - final FieldsReader fr = ((SegmentReader) ir).getFieldsReader(); - assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index", - shouldStillBeCompressed != fr.canReadRawDocs()); - } - - // test that decompression works correctly - for(int i=0; i 0; - final int shouldSize = shouldStillBeCompressed ? - compressedSize : - (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); - assertEquals("size incorrect", shouldSize, actualSize); - if (!shouldStillBeCompressed) { - assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize); - } - } - } - assertEquals("correct number of tests", 34 * 2, count); - } finally { - reader.close(); - } - } - public void testOptimizeOldIndex() throws Exception { - int hasTested29 = 0; - Random rand = newRandom(); for(int i=0;i= 3.0 - if (oldNames[i].compareTo("30.") < 0) continue; unzip(getDataFile("index." + oldNames[i] + ".zip"), oldNames[i]); String fullPath = fullDir(oldNames[i]); Index: lucene/src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestCodecs.java (revision 949382) +++ lucene/src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -281,7 +281,7 @@ final Directory dir = new MockRAMDirectory(); this.write(fieldInfos, dir, fields); - final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, CodecProvider.getDefault().getWriter(null)); + final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, -1, SEGMENT, false, true, CodecProvider.getDefault().getWriter(null)); si.setHasProx(false); final FieldsProducer reader = si.getCodec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 64, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR)); @@ -319,7 +319,7 @@ final Directory dir = new MockRAMDirectory(); this.write(fieldInfos, dir, fields); - final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, CodecProvider.getDefault().getWriter(null)); + final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, -1, SEGMENT, false, true, CodecProvider.getDefault().getWriter(null)); final FieldsProducer terms = si.getCodec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR)); Index: lucene/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDoc.java (revision 949382) +++ lucene/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -194,8 +194,7 @@ merger.closeReaders(); final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, - useCompoundFile, true, -1, null, false, merger.hasProx(), - merger.getCodec()); + useCompoundFile, -1, null, false, merger.hasProx(), merger.getCodec()); if (useCompoundFile) { List filesToDelete = merger.createCompoundFile(merged + ".cfs", info); Index: lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 949382) +++ lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -72,8 +72,8 @@ merger.closeReaders(); assertTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory - SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true, - -1, null, false, merger.hasProx(), merger.getCodec()), BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null); + SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, -1, + null, false, merger.hasProx(), merger.getCodec()), BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Index: lucene/src/test/org/apache/lucene/index/index.19.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.19.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.20.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.20.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.21.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.21.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.22.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.22.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.23.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.23.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.24.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.24.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.29.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.29.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream