Index: modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (revision 1222967) +++ modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (working copy) @@ -56,20 +56,11 @@ * @lucene.experimental */ public class DirectSpellChecker { - /** The default StringDistance, Levenshtein distance implemented internally + /** The default StringDistance, Damerau-Levenshtein distance implemented internally * via {@link LevenshteinAutomata}. *

- * Note: this is the fastest distance metric, because Levenshtein is used + * Note: this is the fastest distance metric, because Damerau-Levenshtein is used * to draw candidates from the term dictionary: this just re-uses the scoring. - *

- * Note also that this metric differs in subtle ways from {@link LevensteinDistance}: - *

*/ public static final StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance(); @@ -277,8 +268,8 @@ * Set the string distance metric. * The default is {@link #INTERNAL_LEVENSHTEIN} *

- * Note: because this spellchecker draws its candidates from the - * term dictionary using Levenshtein, it works best with an edit-distance-like + * Note: because this spellchecker draws its candidates from the term + * dictionary using Damerau-Levenshtein, it works best with an edit-distance-like * string metric. If you use a different metric than the default, * you might want to consider increasing {@link #setMaxInspections(int)} * to draw more candidates for your metric to rank. @@ -401,7 +392,7 @@ if (terms == null) { return Collections.emptyList(); } - FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1)); + FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true); final PriorityQueue stQueue = new PriorityQueue(); BytesRef queryTerm = new BytesRef(term.text()); Index: modules/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java (revision 1222967) +++ modules/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java (working copy) @@ -20,16 +20,22 @@ import org.apache.lucene.util.IntsRef; /** - * Levenshtein implemented in a consistent way as Lucene's FuzzyTermsEnum. + * Damerau-Levenshtein (optimal string alignment) implemented in a consistent + * way as Lucene's FuzzyTermsEnum with the transpositions option enabled. * - * Note also that this metric differs in subtle ways from {@link LevensteinDistance}: + * Notes: *

+ * + * NOTE: this class is not particularly efficient. It is only intended + * for merging results from multiple DirectSpellCheckers. */ public final class LuceneLevenshteinDistance implements StringDistance { @@ -38,28 +44,24 @@ IntsRef targetPoints; IntsRef otherPoints; int n; - int p[]; //'previous' cost array, horizontally - int d[]; // cost array, horizontally - int _d[]; //placeholder to assist in swapping p and d - + int d[][]; // cost array + // cheaper to do this up front once targetPoints = toIntsRef(target); otherPoints = toIntsRef(other); n = targetPoints.length; - p = new int[n+1]; - d = new int[n+1]; + final int m = otherPoints.length; + d = new int[n+1][m+1]; - final int m = otherPoints.length; if (n == 0 || m == 0) { if (n == m) { - return 1; + return 0; } else { - return 0; + return Math.max(n, m); } } - // indexes into strings s and t int i; // iterates through s int j; // iterates through t @@ -68,29 +70,29 @@ int cost; // cost - for (i = 0; i <= n; i++) { - p[i] = i; + for (i = 0; i<=n; i++) { + d[i][0] = i; } + + for (j = 0; j<=m; j++) { + d[0][j] = j; + } - for (j = 1; j <= m; j++) { - t_j = otherPoints.ints[j - 1]; - d[0] = j; + for (j = 1; j<=m; j++) { + t_j = otherPoints.ints[j-1]; - for (i=1; i <= n; i++) { - cost = targetPoints.ints[i - 1] == t_j ? 0 : 1; + for (i=1; i<=n; i++) { + cost = targetPoints.ints[i-1]==t_j ? 0 : 1; // minimum of cell to the left+1, to the top+1, diagonally left and up +cost - d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); + d[i][j] = Math.min(Math.min(d[i-1][j]+1, d[i][j-1]+1), d[i-1][j-1]+cost); + // transposition + if (i > 1 && j > 1 && targetPoints.ints[i-1] == otherPoints.ints[j-2] && targetPoints.ints[i-2] == otherPoints.ints[j-1]) { + d[i][j] = Math.min(d[i][j], d[i-2][j-2] + cost); + } } - - // copy current distance counts to 'previous row' distance counts - _d = p; - p = d; - d = _d; } - - // our last action in the above loop was to switch d and p, so p now - // actually has the most recent cost counts - return 1.0f - ((float) p[n] / Math.min(m, n)); + + return 1.0f - ((float) d[n][m] / Math.min(m, n)); } private static IntsRef toIntsRef(String s) { Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (revision 1222967) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (working copy) @@ -211,7 +211,7 @@ AttributeSource atts = new AttributeSource(); MaxNonCompetitiveBoostAttribute maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); - FuzzyTermsEnum fe = new FuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()), atts, startTerm, f.minSimilarity, f.prefixLength); + FuzzyTermsEnum fe = new FuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()), atts, startTerm, f.minSimilarity, f.prefixLength, false); //store the df so all variants use same idf int df = reader.docFreq(startTerm); int numVariants=0; Index: lucene/common-build.xml =================================================================== --- lucene/common-build.xml (revision 1222967) +++ lucene/common-build.xml (working copy) @@ -194,7 +194,7 @@ - + Index: lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java =================================================================== --- lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (revision 1222967) +++ lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (working copy) @@ -23,7 +23,6 @@ import org.apache.lucene.util.LuceneTestCase; public class TestLevenshteinAutomata extends LuceneTestCase { - public void testLev0() throws Exception { assertLev("", 0); assertCharVectors(0); @@ -64,31 +63,46 @@ * up to some maximum distance. */ private void assertLev(String s, int maxDistance) { - LevenshteinAutomata builder = new LevenshteinAutomata(s); + LevenshteinAutomata builder = new LevenshteinAutomata(s, false); + LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true); Automaton automata[] = new Automaton[maxDistance + 1]; + Automaton tautomata[] = new Automaton[maxDistance + 1]; for (int n = 0; n < automata.length; n++) { automata[n] = builder.toAutomaton(n); + tautomata[n] = tbuilder.toAutomaton(n); assertNotNull(automata[n]); + assertNotNull(tautomata[n]); assertTrue(automata[n].isDeterministic()); + assertTrue(tautomata[n].isDeterministic()); assertTrue(SpecialOperations.isFinite(automata[n])); + assertTrue(SpecialOperations.isFinite(tautomata[n])); AutomatonTestUtil.assertNoDetachedStates(automata[n]); + AutomatonTestUtil.assertNoDetachedStates(tautomata[n]); // check that the dfa for n-1 accepts a subset of the dfa for n if (n > 0) { assertTrue(automata[n-1].subsetOf(automata[n])); + assertTrue(automata[n-1].subsetOf(tautomata[n])); + assertTrue(tautomata[n-1].subsetOf(automata[n])); + assertTrue(tautomata[n-1].subsetOf(tautomata[n])); assertNotSame(automata[n-1], automata[n]); } + // check that Lev(N) is a subset of LevT(N) + assertTrue(automata[n].subsetOf(tautomata[n])); // special checks for specific n switch(n) { case 0: // easy, matches the string itself assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeString(s), automata[0])); + assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeString(s), tautomata[0])); break; case 1: // generate a lev1 naively, and check the accepted lang is the same. assertTrue(BasicOperations.sameLanguage(naiveLev1(s), automata[1])); + assertTrue(BasicOperations.sameLanguage(naiveLev1T(s), tautomata[1])); break; default: assertBruteForce(s, automata[n], n); + assertBruteForceT(s, tautomata[n], n); break; } } @@ -111,6 +125,17 @@ } /** + * Return an automaton that accepts all 1-character insertions, deletions, + * substitutions, and transpositions of s. + */ + private Automaton naiveLev1T(String s) { + Automaton a = naiveLev1(s); + a = BasicOperations.union(a, transpositionsOf(s)); + MinimizationOperations.minimize(a); + return a; + } + + /** * Return an automaton that accepts all 1-character insertions of s (inserting * one character) */ @@ -170,6 +195,29 @@ return a; } + /** + * Return an automaton that accepts all transpositions of s + * (transposing two adjacent characters) + */ + private Automaton transpositionsOf(String s) { + if (s.length() < 2) + return BasicAutomata.makeEmpty(); + List list = new ArrayList(); + for (int i = 0; i < s.length()-1; i++) { + StringBuilder sb = new StringBuilder(); + sb.append(s.substring(0, i)); + sb.append(s.charAt(i+1)); + sb.append(s.charAt(i)); + sb.append(s.substring(i+2, s.length())); + String st = sb.toString(); + if (!st.equals(s)) + list.add(BasicAutomata.makeString(st)); + } + Automaton a = BasicOperations.union(list); + MinimizationOperations.minimize(a); + return a; + } + private void assertBruteForce(String input, Automaton dfa, int distance) { CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa); int maxLen = input.length() + distance + 1; @@ -185,6 +233,21 @@ } } + private void assertBruteForceT(String input, Automaton dfa, int distance) { + CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa); + int maxLen = input.length() + distance + 1; + int maxNum = (int) Math.pow(2, maxLen); + for (int i = 0; i < maxNum; i++) { + String encoded = Integer.toString(i, 2); + boolean accepts = ra.run(encoded); + if (accepts) { + assertTrue(getTDistance(input, encoded) <= distance); + } else { + assertTrue(getTDistance(input, encoded) > distance); + } + } + } + //***************************** // Compute Levenshtein distance: see org.apache.commons.lang.StringUtils#getLevenshteinDistance(String, String) //***************************** @@ -260,4 +323,58 @@ // actually has the most recent cost counts return Math.abs(p[n]); } + + private int getTDistance(String target, String other) { + char[] sa; + int n; + int d[][]; // cost array + + sa = target.toCharArray(); + n = sa.length; + final int m = other.length(); + d = new int[n+1][m+1]; + + if (n == 0 || m == 0) { + if (n == m) { + return 0; + } + else { + return Math.max(n, m); + } + } + + // indexes into strings s and t + int i; // iterates through s + int j; // iterates through t + + char t_j; // jth character of t + + int cost; // cost + + for (i = 0; i<=n; i++) { + d[i][0] = i; + } + + for (j = 0; j<=m; j++) { + d[0][j] = j; + } + + for (j = 1; j<=m; j++) { + t_j = other.charAt(j-1); + + for (i=1; i<=n; i++) { + cost = sa[i-1]==t_j ? 0 : 1; + // minimum of cell to the left+1, to the top+1, diagonally left and up +cost + d[i][j] = Math.min(Math.min(d[i-1][j]+1, d[i][j-1]+1), d[i-1][j-1]+cost); + // transposition + if (i > 1 && j > 1 && target.charAt(i-1) == other.charAt(j-2) && target.charAt(i-2) == other.charAt(j-1)) { + d[i][j] = Math.min(d[i][j], d[i-2][j-2] + cost); + } + } + } + + // our last action in the above loop was to switch d and p, so p now + // actually has the most recent cost counts + return Math.abs(d[n][m]); + } } Index: lucene/src/java/org/apache/lucene/search/FuzzyQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (revision 1222967) +++ lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (working copy) @@ -141,7 +141,7 @@ if (!termLongEnough) { // can only match if it's exact return new SingleTermsEnum(terms.iterator(null), term.bytes()); } - return new FuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength); + return new FuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength, false); } /** Index: lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 1222967) +++ lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -80,6 +80,8 @@ private final int termText[]; private final int realPrefixLength; + private final boolean transpositions; + /** * Constructor for enumeration of all terms from specified reader which share a prefix of * length prefixLength with term and which have a fuzzy similarity > @@ -98,7 +100,7 @@ * @throws IOException */ public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, - final float minSimilarity, final int prefixLength) throws IOException { + final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException { if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity) throw new IllegalArgumentException("fractional edit distances are not allowed"); if (minSimilarity < 0.0f) @@ -130,6 +132,11 @@ maxEdits = initialMaxDistance(this.minSimilarity, termLength); raw = false; } + if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { + throw new UnsupportedOperationException("with transpositions enabled, distances > " + + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported "); + } + this.transpositions = transpositions; this.scale_factor = 1.0f / (1.0f - this.minSimilarity); this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); @@ -162,7 +169,7 @@ if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = - new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength)); + new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i); Index: lucene/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.util.automaton; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The following code was generated with the moman/finenight pkg +// This package is available under the MIT License, see NOTICE.txt +// for more details. + +import org.apache.lucene.util.automaton.LevenshteinAutomata.ParametricDescription; + +/** Parametric description for generating a Levenshtein automaton of degree 1, + with transpositions as primitive edits */ +class Lev1TParametricDescription extends ParametricDescription { + + @Override + int transition(int absState, int position, int vector) { + // null absState should never be passed in + assert absState != -1; + + // decode absState -> state, offset + int state = absState/(w+1); + int offset = absState%(w+1); + assert offset >= 0; + + if (position == w) { + if (state < 2) { + final int loc = vector * 2 + state; + offset += unpack(offsetIncrs0, loc, 1); + state = unpack(toStates0, loc, 2)-1; + } + } else if (position == w-1) { + if (state < 3) { + final int loc = vector * 3 + state; + offset += unpack(offsetIncrs1, loc, 1); + state = unpack(toStates1, loc, 2)-1; + } + } else if (position == w-2) { + if (state < 6) { + final int loc = vector * 6 + state; + offset += unpack(offsetIncrs2, loc, 2); + state = unpack(toStates2, loc, 3)-1; + } + } else { + if (state < 6) { + final int loc = vector * 6 + state; + offset += unpack(offsetIncrs3, loc, 2); + state = unpack(toStates3, loc, 3)-1; + } + } + + if (state == -1) { + // null state + return -1; + } else { + // translate back to abs + return state*(w+1)+offset; + } + } + + // 1 vectors; 2 states per vector; array length = 2 + private final static long[] toStates0 = new long[] /*2 bits per value */ { + 0x2L + }; + private final static long[] offsetIncrs0 = new long[] /*1 bits per value */ { + 0x0L + }; + + // 2 vectors; 3 states per vector; array length = 6 + private final static long[] toStates1 = new long[] /*2 bits per value */ { + 0xa43L + }; + private final static long[] offsetIncrs1 = new long[] /*1 bits per value */ { + 0x38L + }; + + // 4 vectors; 6 states per vector; array length = 24 + private final static long[] toStates2 = new long[] /*3 bits per value */ { + 0x3453491482140003L,0x6dL + }; + private final static long[] offsetIncrs2 = new long[] /*2 bits per value */ { + 0x555555a20000L + }; + + // 8 vectors; 6 states per vector; array length = 48 + private final static long[] toStates3 = new long[] /*3 bits per value */ { + 0x21520854900c0003L,0x5b4d19a24534916dL,0xda34L + }; + private final static long[] offsetIncrs3 = new long[] /*2 bits per value */ { + 0x5555ae0a20fc0000L,0x55555555L + }; + + // state map + // 0 -> [(0, 0)] + // 1 -> [(0, 1)] + // 2 -> [(0, 1), (1, 1)] + // 3 -> [(0, 1), (2, 1)] + // 4 -> [t(0, 1), (0, 1), (1, 1), (2, 1)] + // 5 -> [(0, 1), (1, 1), (2, 1)] + + + public Lev1TParametricDescription(int w) { + super(w, 1, new int[] {0,1,0,-1,-1,-1}); + } +} Index: lucene/src/java/org/apache/lucene/util/automaton/Lev2TParametricDescription.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/Lev2TParametricDescription.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/automaton/Lev2TParametricDescription.java (revision 0) @@ -0,0 +1,264 @@ +package org.apache.lucene.util.automaton; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The following code was generated with the moman/finenight pkg +// This package is available under the MIT License, see NOTICE.txt +// for more details. + +import org.apache.lucene.util.automaton.LevenshteinAutomata.ParametricDescription; + +/** Parametric description for generating a Levenshtein automaton of degree 2, + with transpositions as primitive edits */ +class Lev2TParametricDescription extends ParametricDescription { + + @Override + int transition(int absState, int position, int vector) { + // null absState should never be passed in + assert absState != -1; + + // decode absState -> state, offset + int state = absState/(w+1); + int offset = absState%(w+1); + assert offset >= 0; + + if (position == w) { + if (state < 3) { + final int loc = vector * 3 + state; + offset += unpack(offsetIncrs0, loc, 1); + state = unpack(toStates0, loc, 2)-1; + } + } else if (position == w-1) { + if (state < 5) { + final int loc = vector * 5 + state; + offset += unpack(offsetIncrs1, loc, 1); + state = unpack(toStates1, loc, 3)-1; + } + } else if (position == w-2) { + if (state < 13) { + final int loc = vector * 13 + state; + offset += unpack(offsetIncrs2, loc, 2); + state = unpack(toStates2, loc, 4)-1; + } + } else if (position == w-3) { + if (state < 28) { + final int loc = vector * 28 + state; + offset += unpack(offsetIncrs3, loc, 2); + state = unpack(toStates3, loc, 5)-1; + } + } else if (position == w-4) { + if (state < 45) { + final int loc = vector * 45 + state; + offset += unpack(offsetIncrs4, loc, 3); + state = unpack(toStates4, loc, 6)-1; + } + } else { + if (state < 45) { + final int loc = vector * 45 + state; + offset += unpack(offsetIncrs5, loc, 3); + state = unpack(toStates5, loc, 6)-1; + } + } + + if (state == -1) { + // null state + return -1; + } else { + // translate back to abs + return state*(w+1)+offset; + } + } + + // 1 vectors; 3 states per vector; array length = 3 + private final static long[] toStates0 = new long[] /*2 bits per value */ { + 0x23L + }; + private final static long[] offsetIncrs0 = new long[] /*1 bits per value */ { + 0x0L + }; + + // 2 vectors; 5 states per vector; array length = 10 + private final static long[] toStates1 = new long[] /*3 bits per value */ { + 0x13688b44L + }; + private final static long[] offsetIncrs1 = new long[] /*1 bits per value */ { + 0x3e0L + }; + + // 4 vectors; 13 states per vector; array length = 52 + private final static long[] toStates2 = new long[] /*4 bits per value */ { + 0x60dbb0b05200b504L,0x5233217627062227L,0x2355543214323235L,0x4354L + }; + private final static long[] offsetIncrs2 = new long[] /*2 bits per value */ { + 0x555080a800002000L,0x5555555555L + }; + + // 8 vectors; 28 states per vector; array length = 224 + private final static long[] toStates3 = new long[] /*5 bits per value */ { + 0xe701c02940059404L,0xa010162000a50000L,0xb02c8c40a1416288L,0xa821032310858c0L, + 0x314423980d28b201L,0x5281e528847788e0L,0xa23980d308c2280eL,0x1e3294b1a962278cL, + 0x8c41309e2288e528L,0x11444409021aca21L,0x11a4624886b1086bL,0x2a6258941d6240c4L, + 0x5024a50b489074adL,0x14821aca520c411aL,0x5888b5890b594a44L,0x941d6520c411a465L, + 0x8b589075ad6a62d4L,0x1a5055a4L + }; + private final static long[] offsetIncrs3 = new long[] /*2 bits per value */ { + 0x30c30200002000L,0x2a0030f3c3fc333cL,0x233a00328282a820L,0x5555555532b283a8L, + 0x5555555555555555L,0x5555555555555555L,0x5555555555555555L + }; + + // 16 vectors; 45 states per vector; array length = 720 + private final static long[] toStates4 = new long[] /*6 bits per value */ { + 0x3801450002c5004L,0xc500014b00000e38L,0x51451401402L,0x0L, + 0x518000b14010000L,0x9f1c20828e20230L,0x219f0df0830a70c2L,0x8200008208208200L, + 0x805050160800800L,0x3082098602602643L,0x4564014250508064L,0x850051420000831L, + 0x4140582085002082L,0x456180980990c201L,0x8316d0c50a01051L,0x21451420050df0e0L, + 0xd14214014508214L,0x3c21c01850821c60L,0x1cb1403cb142087L,0x800821451851822cL, + 0x20020820800020L,0xd006182087180345L,0xcb0a81cb24976b09L,0x8b1a60e624709d1L, + 0x249082082249089L,0xc31421c600d2c024L,0x3c31451515454423L,0x31853c22c21cb140L, + 0x4514500b2c208214L,0x8718034508b0051L,0xb2cb45515108f0c5L,0xe824715d1cb0a810L, + 0x1422cb14908b0e60L,0x30812c22c02cb145L,0x842022020cb1420cL,0x5c20ce0820ce0850L, + 0x208208208b0d70c2L,0x4208508214214208L,0x920834050830c20L,0xc6134dc613653592L, + 0xd309341c6dc4db4dL,0x6424d90854d34d34L,0x92072c22030814c2L,0x4220724b24a30930L, + 0x2470d72025c920e2L,0x92c92d70975c9082L,0xcb0880c204924e08L,0x45739728c24c2481L, + 0xc6da4db5da6174daL,0x4b5d35d75d30971dL,0x1030815c93825ce2L,0x51442051020cb145L, + 0xc538210e2c220e2cL,0x851421452cb0d70L,0x204b085085145142L,0x921560834051440cL, + 0x4d660e4da60e6595L,0x94d914e41c6dc658L,0x826426591454d365L,0x2892072c51030813L, + 0xe2c22072cb2ca30bL,0x452c70d720538910L,0x8b2cb2d708e3891L,0x81cb1440c204b24eL, + 0xda44e38e28c2ca24L,0x1dc6da6585d660e4L,0xe2cb5d338e5d914eL,0x38938238L + }; + private final static long[] offsetIncrs4 = new long[] /*3 bits per value */ { + 0x3002000000080000L,0x20c060L,0x8149000004000000L,0x4024924110824824L, + 0xdb6030c360002082L,0x6c36c06c301b0d80L,0xb01861b0000db0dbL,0x1b7036209188e06dL, + 0x800920006d86db7L,0x4920c2402402490L,0x49000208249009L,0x4908128128124804L, + 0x34800104124a44a2L,0xc30930900d24020cL,0x40009a0924c24d24L,0x4984a069201061aL, + 0x494d049271269262L,0x2492492492492492L,0x9249249249249249L,0x4924924924924924L, + 0x2492492492492492L,0x9249249249249249L,0x4924924924924924L,0x2492492492492492L, + 0x9249249249249249L,0x4924924924924924L,0x2492492492492492L,0x9249249249249249L, + 0x4924924924924924L,0x2492492492492492L,0x9249249249249249L,0x4924924924924924L, + 0x2492492492492492L,0x249249249249L + }; + + // 32 vectors; 45 states per vector; array length = 1440 + private final static long[] toStates5 = new long[] /*6 bits per value */ { + 0x3801450002c5004L,0xc500014b00000e38L,0x51451401402L,0x0L, + 0x514000b14010000L,0x550000038e00e0L,0x264518500600b180L,0x8208208208208208L, + 0x2c50040820820L,0x70820a38808c0146L,0xc37c20c29c30827cL,0x20820820800867L, + 0xb140102002002080L,0x828e202300518000L,0x830a70c209f1c20L,0x51451450853df0dfL, + 0x1614214214508214L,0x6026026430805050L,0x2505080643082098L,0x4200008314564014L, + 0x850020820850051L,0x80990c2014140582L,0x8201920208261809L,0x892051990060941L, + 0x22492492c22cb242L,0x430805050162492cL,0x8041451586026026L,0x37c38020c5b43142L, + 0x4208508514508014L,0x141405850850051L,0x51456180980990c2L,0xe008316d0c50a010L, + 0x2c52cb2c508b21f0L,0x600d2c92c22cb249L,0x873c21c01850821cL,0x2c01cb1403cb1420L, + 0x2080082145185182L,0x4500200208208000L,0x870061420871803L,0x740500f5050821cfL, + 0x934d964618609000L,0x4c24d34d30824d30L,0x1860821c600d642L,0xc2a072c925dac274L, + 0x2c69839891c27472L,0x9242082089242242L,0x8208718034b00900L,0x1cb24976b09d0061L, + 0x60e624709d1cb0a8L,0xd31455d71574ce3eL,0x1c600d3825c25d74L,0x51515454423c3142L, + 0xc22c21cb1403c314L,0xb2c20821431853L,0x34508b005145145L,0x5515108f0c508718L, + 0x8740500f2051454L,0xe2534d920618f090L,0x493826596592c238L,0x4423c31421c600d6L, + 0x72c2a042cb2d1545L,0x422c3983a091c574L,0xb2c514508b2c52L,0xf0c508718034b08bL, + 0xa810b2cb45515108L,0x2260e824715d1cb0L,0xe6592c538e2d74ceL,0x420c308138938238L, + 0x850842022020cb1L,0x70c25c20ce0820ceL,0x4208208208208b0dL,0xc20420850821421L, + 0x21080880832c5083L,0xa50838820838c214L,0xaaaaaaaaa9c39430L,0x1aaa7eaa9fa9faaaL, + 0x824820d01420c308L,0x7184d37184d94d64L,0x34c24d071b7136d3L,0x990936421534d34dL, + 0x834050830c20530L,0x34dc613653592092L,0xa479c6dc4db4dc61L,0x920a9f924924924aL, + 0x72c220308192a82aL,0x724b24a30930920L,0xd72025c920e2422L,0x92d70975c9082247L, + 0x880c204924e0892cL,0x2c928c24c2481cb0L,0x80a5248889088749L,0x6a861b2aaac74394L, + 0x81b2ca6ab27b278L,0xa3093092072c2203L,0xd76985d36915ce5cL,0x5d74c25c771b6936L, + 0x724e0973892d74d7L,0x4c2481cb0880c205L,0x6174da45739728c2L,0x4aa175c6da4db5daL, + 0x6a869b2786486186L,0xcb14510308186caL,0x220e2c5144205102L,0xcb0d70c538210e2cL, + 0x1451420851421452L,0x51440c204b085085L,0xcb1451081440832cL,0x94316208488b0888L, + 0xfaaa7dfa9f7e79c3L,0x30819ea7ea7df7dL,0x6564855820d01451L,0x9613598393698399L, + 0xd965364539071b71L,0x4e0990996451534L,0x21560834051440c2L,0xd660e4da60e65959L, + 0x9207e979c6dc6584L,0xa82a8207df924820L,0x892072c5103081a6L,0x2c22072cb2ca30b2L, + 0x52c70d720538910eL,0x8b2cb2d708e38914L,0x1cb1440c204b24e0L,0x874b2cb28c2ca248L, + 0x4394816224488b08L,0x9e786aa69b1f7e77L,0x51030819eca6a9e7L,0x8e38a30b2892072cL, + 0x6996175983936913L,0x74ce39764538771bL,0xc204e24e08e38b2dL,0x28c2ca2481cb1440L, + 0x85d660e4da44e38eL,0x698607e975c6da65L,0xa6ca6aa699e7864aL + }; + private final static long[] offsetIncrs5 = new long[] /*3 bits per value */ { + 0x3002000000080000L,0x20c060L,0x100000004000000L,0xdb6db6db50603018L, + 0xa480000200002db6L,0x1249208841241240L,0x4000010000104120L,0x2492c42092092052L, + 0xc30d800096592d9L,0xb01b0c06c36036d8L,0x186c00036c36db0dL,0xad860361b01b6c06L, + 0x360001b75b6dd6ddL,0xc412311c0db6030cL,0xdb0db6e36e06L,0x9188e06db01861bL, + 0x6dd6db71b72b62L,0x4024024900800920L,0x20824900904920c2L,0x1201248040049000L, + 0x5524ad4aa4906120L,0x4092402002480015L,0x9252251248409409L,0x4920100124000820L, + 0x29128924204a04a0L,0x900830d200055549L,0x934930c24c24034L,0x418690002682493L, + 0x9a49861261201a48L,0xc348001355249d4L,0x24c40930940d2402L,0x1a40009a0924e24dL, + 0x6204984a06920106L,0x92494d5492712692L,0x4924924924924924L,0x2492492492492492L, + 0x9249249249249249L,0x4924924924924924L,0x2492492492492492L,0x9249249249249249L, + 0x4924924924924924L,0x2492492492492492L,0x9249249249249249L,0x4924924924924924L, + 0x2492492492492492L,0x9249249249249249L,0x4924924924924924L,0x2492492492492492L, + 0x9249249249249249L,0x4924924924924924L,0x2492492492492492L,0x9249249249249249L, + 0x4924924924924924L,0x2492492492492492L,0x9249249249249249L,0x4924924924924924L, + 0x2492492492492492L,0x9249249249249249L,0x4924924924924924L,0x2492492492492492L, + 0x9249249249249249L,0x4924924924924924L,0x2492492492492492L,0x9249249249249249L, + 0x4924924924924924L,0x2492492492492492L,0x9249249249249249L,0x24924924L + }; + + // state map + // 0 -> [(0, 0)] + // 1 -> [(0, 2)] + // 2 -> [(0, 1)] + // 3 -> [(0, 1), (1, 1)] + // 4 -> [(0, 2), (1, 2)] + // 5 -> [t(0, 2), (0, 2), (1, 2), (2, 2)] + // 6 -> [(0, 2), (2, 1)] + // 7 -> [(0, 1), (2, 2)] + // 8 -> [(0, 2), (2, 2)] + // 9 -> [(0, 1), (1, 1), (2, 1)] + // 10 -> [(0, 2), (1, 2), (2, 2)] + // 11 -> [(0, 1), (2, 1)] + // 12 -> [t(0, 1), (0, 1), (1, 1), (2, 1)] + // 13 -> [(0, 2), (1, 2), (2, 2), (3, 2)] + // 14 -> [t(0, 2), (0, 2), (1, 2), (2, 2), (3, 2)] + // 15 -> [(0, 2), t(1, 2), (1, 2), (2, 2), (3, 2)] + // 16 -> [(0, 2), (2, 1), (3, 1)] + // 17 -> [(0, 1), t(1, 2), (2, 2), (3, 2)] + // 18 -> [(0, 2), (3, 2)] + // 19 -> [(0, 2), (1, 2), t(1, 2), (2, 2), (3, 2)] + // 20 -> [t(0, 2), (0, 2), (1, 2), (3, 1)] + // 21 -> [(0, 1), (1, 1), (3, 2)] + // 22 -> [(0, 2), (2, 2), (3, 2)] + // 23 -> [(0, 2), (1, 2), (3, 1)] + // 24 -> [(0, 2), (1, 2), (3, 2)] + // 25 -> [(0, 1), (2, 2), (3, 2)] + // 26 -> [(0, 2), (3, 1)] + // 27 -> [(0, 1), (3, 2)] + // 28 -> [(0, 2), (2, 1), (4, 2)] + // 29 -> [(0, 2), t(1, 2), (1, 2), (2, 2), (3, 2), (4, 2)] + // 30 -> [(0, 2), (1, 2), (4, 2)] + // 31 -> [(0, 2), (1, 2), (3, 2), (4, 2)] + // 32 -> [(0, 2), (2, 2), (3, 2), (4, 2)] + // 33 -> [(0, 2), (1, 2), t(2, 2), (2, 2), (3, 2), (4, 2)] + // 34 -> [(0, 2), (1, 2), (2, 2), t(2, 2), (3, 2), (4, 2)] + // 35 -> [(0, 2), (3, 2), (4, 2)] + // 36 -> [(0, 2), t(2, 2), (2, 2), (3, 2), (4, 2)] + // 37 -> [t(0, 2), (0, 2), (1, 2), (2, 2), (4, 2)] + // 38 -> [(0, 2), (1, 2), (2, 2), (4, 2)] + // 39 -> [t(0, 2), (0, 2), (1, 2), (2, 2), (3, 2), (4, 2)] + // 40 -> [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2)] + // 41 -> [(0, 2), (4, 2)] + // 42 -> [t(0, 2), (0, 2), (1, 2), (2, 2), t(2, 2), (3, 2), (4, 2)] + // 43 -> [(0, 2), (2, 2), (4, 2)] + // 44 -> [(0, 2), (1, 2), t(1, 2), (2, 2), (3, 2), (4, 2)] + + + public Lev2TParametricDescription(int w) { + super(w, 2, new int[] {0,2,1,0,1,0,-1,0,0,-1,0,-1,-1,-1,-1,-1,-2,-1,-1,-1,-2,-1,-1,-2,-1,-1,-2,-1,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2}); + } +} Index: lucene/src/java/org/apache/lucene/util/automaton/createLevAutomata.py =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/createLevAutomata.py (revision 1222967) +++ lucene/src/java/org/apache/lucene/util/automaton/createLevAutomata.py (working copy) @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Note, this file is known to work with rev 115 of the moman +# Note, this file is known to work with rev 120 of the moman # repository (http://bitbucket.org/jpbarrette/moman/overview) # # See also: http://sites.google.com/site/rrettesite/moman @@ -95,9 +95,9 @@ def main(): - if len(sys.argv) != 2: + if len(sys.argv) != 3: print - print 'Usage: python -u %s N' % sys.argv[0] + print 'Usage: python -u %s N ' % sys.argv[0] print print 'NOTE: the resulting .java file is created in the current working dir!' print @@ -105,8 +105,10 @@ n = int(sys.argv[1]) - tables = genTransitions(n) + transpose = (sys.argv[2] == "True") + tables = genTransitions(n, transpose) + stateMap = {} # init null state @@ -142,8 +144,13 @@ w('') w('import org.apache.lucene.util.automaton.LevenshteinAutomata.ParametricDescription;') w('') - w('/** Parametric description for generating a Levenshtein automaton of degree %s */' % n) - className = 'Lev%dParametricDescription' % n + if transpose: + w('/** Parametric description for generating a Levenshtein automaton of degree %s, ' % n) + w(' with transpositions as primitive edits */') + className = 'Lev%dTParametricDescription' % n + else: + w('/** Parametric description for generating a Levenshtein automaton of degree %s */' % n) + className = 'Lev%dParametricDescription' % n w('class %s extends ParametricDescription {' % className) @@ -201,9 +208,6 @@ byAction = {} for s, (toS, offset) in l: state = str(s) - if state == '[]': - # don't waste code on the null state - continue toState = str(toS) if state not in stateMap: @@ -213,7 +217,7 @@ byFromState[stateMap[state]] = (1+stateMap[toState], offset) - fromStateDesc = ', '.join([str(x) for x in eval(s)]) + fromStateDesc = s[1:len(s)-1] toStateDesc = ', '.join([str(x) for x in toS]) tup = (stateMap[toState], toStateDesc, offset) @@ -222,10 +226,10 @@ byAction[tup].append((fromStateDesc, stateMap[state])) if numCasesPerVector is None: - numCasesPerVector = len(l)-1 + numCasesPerVector = len(l) else: # we require this to be uniform... empirically it seems to be! - assert numCasesPerVector == len(l)-1 + assert numCasesPerVector == len(l) if MODE == 'array': @@ -320,7 +324,10 @@ minErrors = [] for i in xrange(len(stateMap2)-1): w('// %s -> %s' % (i, stateMap2[i])) - v = eval(stateMap2[i]) + # we replace t-notation as its not relevant here + st = stateMap2[i].replace('t', '') + + v = eval(st) minError = min([-i+e for i, e in v]) c = len(v) sum += c Index: lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java (revision 1222967) +++ lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java (working copy) @@ -43,12 +43,20 @@ final int rangeUpper[]; int numRanges = 0; - ParametricDescription descriptions[]; + ParametricDescription descriptions[]; /** * Create a new LevenshteinAutomata for some input String. */ public LevenshteinAutomata(String input) { + this(input, false); + } + + /** + * Create a new LevenshteinAutomata for some input String. + * Optionally use transpositions as a primitive edit. + */ + public LevenshteinAutomata(String input, boolean withTranspositions) { this.input = input; int length = Character.codePointCount(input, 0, input.length()); word = new int[length]; @@ -88,8 +96,8 @@ descriptions = new ParametricDescription[] { null, /* for n=0, we do not need to go through the trouble */ - new Lev1ParametricDescription(word.length), - new Lev2ParametricDescription(word.length), + withTranspositions ? new Lev1TParametricDescription(word.length) : new Lev1ParametricDescription(word.length), + withTranspositions ? new Lev2TParametricDescription(word.length) : new Lev2ParametricDescription(word.length), }; } Index: lucene/src/java/org/apache/lucene/util/automaton/Lev1ParametricDescription.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/Lev1ParametricDescription.java (revision 1222967) +++ lucene/src/java/org/apache/lucene/util/automaton/Lev1ParametricDescription.java (working copy) @@ -89,26 +89,26 @@ // 4 vectors; 5 states per vector; array length = 20 private final static long[] toStates2 = new long[] /*3 bits per value */ { - 0x4da292442420003L + 0x69a292450428003L }; private final static long[] offsetIncrs2 = new long[] /*2 bits per value */ { - 0x5555528000L + 0x5555588000L }; // 8 vectors; 5 states per vector; array length = 40 private final static long[] toStates3 = new long[] /*3 bits per value */ { - 0x14d0812112018003L,0xb1a29b46d48a49L + 0x1690a82152018003L,0xb1a2d346448a49L }; private final static long[] offsetIncrs3 = new long[] /*2 bits per value */ { - 0x555555e80a0f0000L,0x5555L + 0x555555b8220f0000L,0x5555L }; // state map // 0 -> [(0, 0)] // 1 -> [(0, 1)] // 2 -> [(0, 1), (1, 1)] - // 3 -> [(0, 1), (1, 1), (2, 1)] - // 4 -> [(0, 1), (2, 1)] + // 3 -> [(0, 1), (2, 1)] + // 4 -> [(0, 1), (1, 1), (2, 1)] public Lev1ParametricDescription(int w) { Index: lucene/src/java/org/apache/lucene/util/automaton/Lev2ParametricDescription.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/Lev2ParametricDescription.java (revision 1222967) +++ lucene/src/java/org/apache/lucene/util/automaton/Lev2ParametricDescription.java (working copy) @@ -93,7 +93,7 @@ // 2 vectors; 5 states per vector; array length = 10 private final static long[] toStates1 = new long[] /*3 bits per value */ { - 0x1a68c105L + 0x13688b44L }; private final static long[] offsetIncrs1 = new long[] /*1 bits per value */ { 0x3e0L @@ -101,41 +101,41 @@ // 4 vectors; 11 states per vector; array length = 44 private final static long[] toStates2 = new long[] /*4 bits per value */ { - 0x6280b80804280405L,0x2323432321608282L,0x523434543213L + 0x26a09a0a0520a504L,0x2323523321a260a2L,0x354235543213L }; private final static long[] offsetIncrs2 = new long[] /*2 bits per value */ { - 0x5555502220000800L,0x555555L + 0x5555520280000800L,0x555555L }; // 8 vectors; 21 states per vector; array length = 168 private final static long[] toStates3 = new long[] /*5 bits per value */ { - 0x40300c0108801005L,0x80202a8208801000L,0x4021006280a0288dL,0x30482184802d8414L, - 0x5990240880010460L,0x191a28118330900L,0x310c413204c1104L,0x8625084811c4710dL, - 0xa92a398e2188231aL,0x104e351c4a508ca4L,0x21208511c8341483L,0xe6290620946a1910L, - 0xd47221423216a4a0L,0x28L + 0x380e014a051404L,0xe28245009451140L,0x8a26880098a6268cL,0x180a288ca0246213L, + 0x494053284a1080e1L,0x510265a89c311940L,0x4218c41188a6509cL,0x6340c4211c4710dL, + 0xa168398471882a12L,0x104c841c683a0425L,0x3294472904351483L,0xe6290620a84a20d0L, + 0x1441a0ea2896a4a0L,0x32L }; private final static long[] offsetIncrs3 = new long[] /*2 bits per value */ { - 0x33300030c2000800L,0x32828088800c3cfL,0x5555550cace32320L,0x5555555555555555L, + 0x33300230c0000800L,0x220ca080a00fc330L,0x555555f832823380L,0x5555555555555555L, 0x5555555555555555L,0x5555L }; // 16 vectors; 30 states per vector; array length = 480 private final static long[] toStates4 = new long[] /*5 bits per value */ { - 0x80300c0108801005L,0x88210802000L,0x44200401400000L,0x7ae3b88621185c07L, - 0x101500042100404L,0x20803140501446cL,0x40100420006c2122L,0x490140511b004054L, - 0x8401f2e3c086411L,0x120861200b100822L,0x641102400081180cL,0x4802c40100001088L, - 0x8c21195607048418L,0x1421014245bc3f2L,0x23450230661200b1L,0x2108664118240803L, - 0x8c1984802c802004L,0xbc3e28c41150d140L,0xc4120102209421dL,0x7884c11c4710d031L, - 0x210842109031bc62L,0xd21484360c431044L,0x9c265293a3a6e741L,0x1cc710c41109ce70L, - 0x1bce27a846525495L,0x3105425094a108c7L,0x6f735e95254731c4L,0x9ee7a9c234a9393aL, - 0x144720d0520c4150L,0x211051bc646084c2L,0x3614831048220842L,0x93a460e742351488L, - 0xc4120a2e70a24656L,0x284642d4941cc520L,0x4094a210c51bce46L,0xb525073148310502L, - 0x24356939460f7358L,0x4098e7aaL + 0x380e014a051404L,0xaa015452940L,0x55014501000000L,0x1843ddc771085c07L, + 0x7141200040108405L,0x52b44004c5313460L,0x401080200063115cL,0x85314c4d181c5048L, + 0x1440190a3e5c7828L,0x28a232809100a21L,0xa028ca2a84203846L,0xca0240010800108aL, + 0xc7b4205c1580a508L,0x1021090251846b6L,0x4cb513862328090L,0x210863128ca2b8a2L, + 0x4e188ca024402940L,0xa6b6c7c520532d4L,0x8c41101451150219L,0xa0c4211c4710d421L, + 0x2108421094e15063L,0x8f13c43708631044L,0x18274d908c611631L,0x1cc238c411098263L, + 0x450e3a1d0212d0b4L,0x31050242048108c6L,0xfa318b42d07308eL,0xa8865182356907c6L, + 0x1ca410d4520c4140L,0x2954e13883a0ca51L,0x3714831044229442L,0x93946116b58f2c84L, + 0xc41109a5631a574dL,0x1d4512d4941cc520L,0x52848294c643883aL,0xb525073148310502L, + 0xa5356939460f7358L,0x409ca651L }; private final static long[] offsetIncrs4 = new long[] /*3 bits per value */ { - 0xc0602000010000L,0xa000040000000001L,0x248204041248L,0xb0180c06c3618618L, - 0x238d861860001861L,0x41040061c6e06041L,0x4004900c2402400L,0x409489001041001L, - 0x4184184004148124L,0x1041b4980c24c3L,0xd26040938d061061L,0x2492492492494146L, + 0x20c0600000010000L,0x2000040000000001L,0x209204a40209L,0x301b6c0618018618L, + 0x207206186000186cL,0x1200061b8e06dc0L,0x480492080612010L,0xa20204a040048000L, + 0x1061a0000129124L,0x1848349b680612L,0xd26da0204a041868L,0x2492492492496128L, 0x9249249249249249L,0x4924924924924924L,0x2492492492492492L,0x9249249249249249L, 0x4924924924924924L,0x2492492492492492L,0x9249249249249249L,0x4924924924924924L, 0x2492492492492492L,0x9249249249249249L,0x24924924L @@ -143,33 +143,33 @@ // 32 vectors; 30 states per vector; array length = 960 private final static long[] toStates5 = new long[] /*5 bits per value */ { - 0x80300c0108801005L,0x88210802000L,0x42200401400000L,0xa088201000300c03L, - 0x100510842108428L,0x2188461701c01108L,0x108401011eb8eeL,0x85c0700442004014L, - 0x88267ae3b886211L,0x1446c01015108842L,0xc212202080314050L,0x405440100420006L, - 0x10201c50140511b0L,0x942528423b08888L,0x240501446c010155L,0x21007cb8f0219045L, - 0x511b004054402088L,0x2e3c086411490140L,0x200b50904428823fL,0x400081180c120861L, - 0x100001088641102L,0x46030482184802c4L,0x9ce8990840980030L,0x21061200b709c210L, - 0xf0fca308465581c1L,0x802c405084050916L,0xc211956070484184L,0x9e4209ee65bc3f28L, - 0x3450230661200b70L,0x1086641182408032L,0xc1984802c8020042L,0x86098201c8d1408L, - 0xb88a22529ce399L,0x1045434502306612L,0x4088250876f0f8a3L,0xd1408c1984802c80L, - 0xee3dbc3e28c41150L,0xd0310c4188984429L,0xbc627884c11c4710L,0x1044210842109031L, - 0x21704711c4340c43L,0xbdef7bdf0c7a18b4L,0x85210d8310c41ef7L,0x994a4e8e9b9d074L, - 0x60c4310442739c27L,0x3a3a6e741d214843L,0x41ef77bdf77de529L,0x8465254951cc710cL, - 0x94a108c71bce27aL,0x5254731c43105425L,0xdb1c7a38b4a15949L,0xc710c41cf73dce7bL, - 0xe4e9bdcd7a54951cL,0x5427b9ea708d2a4L,0x735e95254731c431L,0xbd677db4a9393a6fL, - 0x4720d0520c41cf75L,0x1051bc646084c214L,0x1483104822084221L,0x193821708511c834L, - 0x1bf6fdef6f7f147aL,0xd08d45220d8520c4L,0x9c289195a4e91839L,0x488361483104828bL, - 0xe5693a460e742351L,0x520c41bf71bdf717L,0xe46284642d4941ccL,0x5024094a210c51bcL, - 0x590b525073148310L,0xce6f7b147a3938a1L,0x941cc520c41f77ddL,0xd5a4e5183dcd62d4L, - 0x48310502639ea890L,0x460f7358b5250731L,0xf779bd6717b56939L + 0x380e014a051404L,0xaa015452940L,0x8052814501000000L,0xb80a515450000e03L, + 0x5140410842108426L,0x71dc421701c01540L,0x100421014610f7L,0x85c0700550145010L, + 0x94a271843ddc7710L,0x1346071412108a22L,0x3115c52b44004c53L,0xc504840108020006L, + 0x54d1001314c4d181L,0x9081204239c4a71L,0x14c5313460714124L,0x51006428f971e0a2L, + 0x4d181c5048402884L,0xa3e5c782885314cL,0x2809409482a8a239L,0x2a84203846028a23L, + 0x10800108aa028caL,0xe1180a288ca0240L,0x98c6b80e3294a108L,0x2942328091098c10L, + 0x11adb1ed08170560L,0xa024004084240946L,0x7b4205c1580a508cL,0xa8c2968c71846b6cL, + 0x4cb5138623280910L,0x10863128ca2b8a20L,0xe188ca0244029402L,0x4e3294e288132d44L, + 0x809409ad1218c39cL,0xf14814cb51386232L,0x514454086429adb1L,0x32d44e188ca02440L, + 0x8c390a6b6c7c5205L,0xd4218c41409cd2aaL,0x5063a0c4211c4710L,0x10442108421094e1L, + 0x31084711c4350863L,0xbdef7bddf05918f2L,0xc4f10dc218c41ef7L,0x9d3642318458c63L, + 0x70863104426098c6L,0x8c6116318f13c43L,0x41ef75dd6b5de4d9L,0xd0212d0b41cc238cL, + 0x2048108c6450e3a1L,0x42d07308e3105024L,0xdb591938f274084bL,0xc238c41f77deefbbL, + 0x1f183e8c62d0b41cL,0x502a2194608d5a4L,0xa318b42d07308e31L,0xed675db56907c60fL, + 0xa410d4520c41f773L,0x54e13883a0ca511cL,0x1483104422944229L,0x20f2329447290435L, + 0x1ef6f7ef6f7df05cL,0xad63cb210dc520c4L,0x58c695d364e51845L,0xc843714831044269L, + 0xe4d93946116b58f2L,0x520c41ef717d6b17L,0x83a1d4512d4941ccL,0x50252848294c6438L, + 0x144b525073148310L,0xefaf7b591c20f275L,0x941cc520c41f777bL,0xd5a4e5183dcd62d4L, + 0x4831050272994694L,0x460f7358b5250731L,0xf779bd6717b56939L }; private final static long[] offsetIncrs5 = new long[] /*3 bits per value */ { - 0xc0602000010000L,0x8000040000000001L,0xb6db6d4030180L,0x810104922800010L, - 0x248a000040000092L,0x618000b649654041L,0x861b0180c06c3618L,0x301b0d861860001L, - 0x61861800075d6ed6L,0x1871b8181048e3L,0xe56041238d861860L,0x40240041040075c6L, - 0x4100104004900c2L,0x55b5240309009001L,0x1025224004104005L,0x10410010520490L, - 0x55495240409489L,0x4980c24c34184184L,0x30d061061001041bL,0x184005556d260309L, - 0x51b4981024e34184L,0x40938d0610610010L,0x492492495546d260L,0x2492492492492492L, + 0x20c0600000010000L,0x40000000001L,0xb6db6d4830180L,0x4812900824800010L, + 0x2092000040000082L,0x618000b659254a40L,0x86c301b6c0618018L,0xdb01860061860001L, + 0x81861800075baed6L,0x186e381b70081cL,0xe56dc02072061860L,0x61201001200075b8L, + 0x480000480492080L,0x52b5248201848040L,0x880812810012000bL,0x4004800004a4492L, + 0xb529124a20204aL,0x49b68061201061a0L,0x8480418680018483L,0x1a000752ad26da01L, + 0x4a349b6808128106L,0xa0204a0418680018L,0x492492497528d26dL,0x2492492492492492L, 0x9249249249249249L,0x4924924924924924L,0x2492492492492492L,0x9249249249249249L, 0x4924924924924924L,0x2492492492492492L,0x9249249249249249L,0x4924924924924924L, 0x2492492492492492L,0x9249249249249249L,0x4924924924924924L,0x2492492492492492L, @@ -182,36 +182,36 @@ // 0 -> [(0, 0)] // 1 -> [(0, 2)] // 2 -> [(0, 1)] - // 3 -> [(0, 2), (1, 2)] - // 4 -> [(0, 1), (1, 1)] + // 3 -> [(0, 1), (1, 1)] + // 4 -> [(0, 2), (1, 2)] // 5 -> [(0, 2), (2, 1)] // 6 -> [(0, 1), (2, 2)] - // 7 -> [(0, 2), (1, 2), (2, 2)] - // 8 -> [(0, 1), (2, 1)] - // 9 -> [(0, 2), (2, 2)] - // 10 -> [(0, 1), (1, 1), (2, 1)] - // 11 -> [(0, 2), (1, 2), (2, 2), (3, 2)] - // 12 -> [(0, 2), (2, 1), (3, 1)] - // 13 -> [(0, 2), (3, 2)] - // 14 -> [(0, 2), (2, 2), (3, 2)] - // 15 -> [(0, 2), (1, 2), (3, 1)] - // 16 -> [(0, 2), (1, 2), (3, 2)] - // 17 -> [(0, 1), (2, 2), (3, 2)] - // 18 -> [(0, 2), (3, 1)] - // 19 -> [(0, 1), (3, 2)] - // 20 -> [(0, 1), (1, 1), (3, 2)] + // 7 -> [(0, 2), (2, 2)] + // 8 -> [(0, 1), (1, 1), (2, 1)] + // 9 -> [(0, 2), (1, 2), (2, 2)] + // 10 -> [(0, 1), (2, 1)] + // 11 -> [(0, 2), (3, 2)] + // 12 -> [(0, 2), (1, 2), (3, 2)] + // 13 -> [(0, 2), (1, 2), (2, 2), (3, 2)] + // 14 -> [(0, 1), (2, 2), (3, 2)] + // 15 -> [(0, 2), (3, 1)] + // 16 -> [(0, 1), (3, 2)] + // 17 -> [(0, 1), (1, 1), (3, 2)] + // 18 -> [(0, 2), (1, 2), (3, 1)] + // 19 -> [(0, 2), (2, 2), (3, 2)] + // 20 -> [(0, 2), (2, 1), (3, 1)] // 21 -> [(0, 2), (2, 1), (4, 2)] // 22 -> [(0, 2), (1, 2), (4, 2)] // 23 -> [(0, 2), (1, 2), (3, 2), (4, 2)] - // 24 -> [(0, 2), (2, 2), (4, 2)] - // 25 -> [(0, 2), (2, 2), (3, 2), (4, 2)] - // 26 -> [(0, 2), (3, 2), (4, 2)] + // 24 -> [(0, 2), (2, 2), (3, 2), (4, 2)] + // 25 -> [(0, 2), (3, 2), (4, 2)] + // 26 -> [(0, 2), (1, 2), (2, 2), (4, 2)] // 27 -> [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2)] // 28 -> [(0, 2), (4, 2)] - // 29 -> [(0, 2), (1, 2), (2, 2), (4, 2)] + // 29 -> [(0, 2), (2, 2), (4, 2)] public Lev2ParametricDescription(int w) { - super(w, 2, new int[] {0,2,1,1,0,-1,0,0,-1,0,-1,-1,-2,-1,-1,-2,-1,-1,-2,-1,-1,-2,-2,-2,-2,-2,-2,-2,-2,-2}); + super(w, 2, new int[] {0,2,1,0,1,-1,0,0,-1,0,-1,-1,-1,-1,-1,-2,-1,-1,-2,-1,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2}); } } Index: lucene/build.xml =================================================================== --- lucene/build.xml (revision 1222967) +++ lucene/build.xml (working copy) @@ -478,10 +478,14 @@ - - + + + +