Index: lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1432331) +++ lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -58,7 +58,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -100,7 +99,7 @@ createIndex("index.nocfs", false, false); } */ - + /* // These are only needed for the special upgrade test to verify // that also single-segment indexes are correctly upgraded by IndexUpgrader. @@ -116,8 +115,40 @@ } */ + + /* + public void testCreateMoreTermsIndex() throws Exception { + // we use a real directory name that is not cleaned up, + // because this method is only used to create backwards + // indexes: + File indexDir = new File("moreterms"); + _TestUtil.rmDir(indexDir); + Directory dir = newFSDirectory(indexDir); + + LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); + mp.setUseCompoundFile(false); + mp.setNoCFSRatio(1.0); + mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); + // TODO: remove randomness + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) + .setMergePolicy(mp); + conf.setCodec(Codec.forName("Lucene40")); + IndexWriter writer = new IndexWriter(dir, conf); + LineFileDocs docs = new LineFileDocs(null, true); + for(int i=0;i<50;i++) { + writer.addDocument(docs.nextDoc()); + } + writer.close(); + dir.close(); + + // Gives you time to copy the index out!: (there is also + // a test option to not remove temp dir...): + Thread.sleep(100000); + } + */ + final static String[] oldNames = {"40.cfs", - "40.nocfs", + "40.nocfs", }; final String[] unsupportedNames = {"19.cfs", @@ -145,7 +176,7 @@ }; final static String[] oldSingleSegmentNames = {"40.optimized.cfs", - "40.optimized.nocfs", + "40.optimized.nocfs", }; static Map oldIndexDirs; @@ -908,4 +939,15 @@ dir.close(); } } + + public static final String moreTermsIndex = "moreterms.40.zip"; + + public void testMoreTerms() throws Exception { + File oldIndexDir = _TestUtil.getTempDir("moreterms"); + _TestUtil.unzip(getDataFile(moreTermsIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + // TODO: more tests + _TestUtil.checkIndex(dir); + dir.close(); + } } Index: lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip =================================================================== --- lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip (revision 1432331) +++ lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip (working copy) Property changes on: lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/util/fst/FST.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/FST.java (revision 1432331) +++ lucene/core/src/java/org/apache/lucene/util/fst/FST.java (working copy) @@ -27,6 +27,11 @@ import java.io.OutputStream; import java.util.HashMap; import java.util.Map; +/* +import java.io.Writer; +import java.io.OutputStreamWriter; +import java.io.FileOutputStream; +*/ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataInput; @@ -125,8 +130,11 @@ /** Added optional packed format. */ private final static int VERSION_PACKED = 3; - private final static int VERSION_CURRENT = VERSION_PACKED; + /** Changed from int to vInt for encoding arc targets. */ + private final static int VERSION_VINT_TARGET = 4; + private final static int VERSION_CURRENT = VERSION_VINT_TARGET; + // Never serialized; just used to represent the virtual // final node w/ no arcs: private final static int FINAL_END_NODE = -1; @@ -263,12 +271,15 @@ // clear early on: private GrowableWriter inCounts; + private final int version; + // make a new empty FST, for building; Builder invokes // this ctor FST(INPUT_TYPE inputType, Outputs outputs, boolean willPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) { this.inputType = inputType; this.outputs = outputs; this.allowArrayArcs = allowArrayArcs; + version = VERSION_CURRENT; bytes = new byte[128]; NO_OUTPUT = outputs.getNoOutput(); if (willPackFST) { @@ -292,7 +303,7 @@ writer = null; // NOTE: only reads most recent format; we don't have // back-compat promise for FSTs (they are experimental): - CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_PACKED); + version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_VINT_TARGET); packed = in.readByte() == 1; if (in.readByte() == 1) { // accepts empty string @@ -345,6 +356,15 @@ // building; we need to break out mutable FST from // immutable allowArrayArcs = false; + + /* + if (bytes.length == 665) { + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + Util.toDot(this, w, false, false); + w.close(); + System.out.println("Wrote FST to out.dot"); + } + */ } public INPUT_TYPE getInputType() { @@ -656,7 +676,7 @@ if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) { assert target.node > 0; //System.out.println(" write target"); - writer.writeInt(target.node); + writer.writeVInt(target.node); } // just write the arcs "like normal" on first pass, @@ -804,12 +824,10 @@ } if (arc.flag(BIT_STOP_NODE)) { } else if (arc.flag(BIT_TARGET_NEXT)) { + } else if (packed) { + in.readVInt(); } else { - if (packed) { - in.readVInt(); - } else { - in.skip(4); - } + readUnpackedNodeTarget(in); } arc.flags = in.readByte(); } @@ -823,6 +841,16 @@ } } + private int readUnpackedNodeTarget(BytesReader in) throws IOException { + int target; + if (version < VERSION_VINT_TARGET) { + target = in.readInt(); + } else { + target = in.readVInt(); + } + return target; + } + /** * Follow the follow arc and read the first arc of its target; * this changes the provided arc (2nd arg) in-place and returns @@ -921,8 +949,10 @@ int pos = in.pos = getNodeAddress(arc.nextArc); final byte b = in.readByte(); if (b == ARCS_AS_FIXED_ARRAY) { - //System.out.println(" nextArc fake array"); + //System.out.println(" nextArc fixed array"); in.readVInt(); + + // Skip bytesPerArc: if (packed) { in.readVInt(); } else { @@ -1024,7 +1054,7 @@ //System.out.println(" abs code=" + code + " derefLen=" + nodeRefToAddress.length); } } else { - arc.target = in.readInt(); + arc.target = readUnpackedNodeTarget(in); } arc.nextArc = in.pos; } @@ -1147,7 +1177,7 @@ if (packed) { in.readVInt(); } else { - in.readInt(); + readUnpackedNodeTarget(in); } } @@ -1452,6 +1482,7 @@ // Creates a packed FST private FST(INPUT_TYPE inputType, PackedInts.Reader nodeRefToAddress, Outputs outputs) { + version = VERSION_CURRENT; packed = true; this.inputType = inputType; bytes = new byte[128]; Index: lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java (revision 1432331) +++ lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java (working copy) @@ -289,6 +289,13 @@ null, willRewrite, true); + if (LuceneTestCase.VERBOSE) { + if (willRewrite) { + System.out.println("TEST: packed FST"); + } else { + System.out.println("TEST: non-packed FST"); + } + } for(InputOutput pair : pairs) { if (pair.output instanceof List) {