Index: lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (revision 1103663) +++ lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (working copy) @@ -30,6 +30,7 @@ package org.apache.lucene.util.automaton; import java.util.BitSet; +import java.util.ArrayList; import java.util.LinkedList; /** @@ -72,8 +73,12 @@ final int[] sigma = a.getStartPoints(); final State[] states = a.getNumberedStates(); final int sigmaLen = sigma.length, statesLen = states.length; - final BitSet[][] reverse = new BitSet[statesLen][sigmaLen]; - final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen]; + @SuppressWarnings("unchecked") final ArrayList[][] reverse = + (ArrayList[][]) new ArrayList[statesLen][sigmaLen]; + @SuppressWarnings("unchecked") final LinkedList[] partition = + (LinkedList[]) new LinkedList[statesLen]; + @SuppressWarnings("unchecked") final ArrayList[] splitblock = + (ArrayList[]) new ArrayList[statesLen]; final int[] block = new int[statesLen]; final StateList[][] active = new StateList[statesLen][sigmaLen]; final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen]; @@ -82,8 +87,8 @@ final BitSet split = new BitSet(statesLen), refine = new BitSet(statesLen), refine2 = new BitSet(statesLen); for (int q = 0; q < statesLen; q++) { - splitblock[q] = new BitSet(statesLen); - partition[q] = new BitSet(statesLen); + splitblock[q] = new ArrayList(); + partition[q] = new LinkedList(); for (int x = 0; x < sigmaLen; x++) { active[q][x] = new StateList(); } @@ -92,23 +97,22 @@ for (int q = 0; q < statesLen; q++) { final State qq = states[q]; final int j = qq.accept ? 0 : 1; - partition[j].set(q); + partition[j].add(qq); block[q] = j; for (int x = 0; x < sigmaLen; x++) { - final BitSet[] r = + final ArrayList[] r = reverse[qq.step(sigma[x]).number]; if (r[x] == null) - r[x] = new BitSet(); - r[x].set(q); + r[x] = new ArrayList(); + r[x].add(qq); } } // initialize active sets for (int j = 0; j <= 1; j++) { - final BitSet part = partition[j]; for (int x = 0; x < sigmaLen; x++) { - for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) { - if (reverse[i][x] != null) - active2[i][x] = active[j][x].add(states[i]); + for (final State qq : partition[j]) { + if (reverse[qq.number][x] != null) + active2[qq.number][x] = active[j][x].add(qq); } } } @@ -127,12 +131,12 @@ pending2.clear(x*statesLen + p); // find states that need to be split off their blocks for (StateListNode m = active[p][x].first; m != null; m = m.next) { - final BitSet r = reverse[m.q.number][x]; - if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) { - if (!split.get(i)) { - split.set(i); - final int j = block[i]; - splitblock[j].set(i); + final ArrayList r = reverse[m.q.number][x]; + if (r != null) for (final State s : r) { + if (!split.get(s.number)) { + split.set(s.number); + final int j = block[s.number]; + splitblock[j].add(s); if (!refine2.get(j)) { refine2.set(j); refine.set(j); @@ -142,18 +146,19 @@ } // refine blocks for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) { - final BitSet sb = splitblock[j]; - if (sb.cardinality() < partition[j].cardinality()) { - final BitSet b1 = partition[j], b2 = partition[k]; - for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) { - b1.clear(i); - b2.set(i); - block[i] = k; + final ArrayList sb = splitblock[j]; + if (sb.size() < partition[j].size()) { + final LinkedList b1 = partition[j]; + final LinkedList b2 = partition[k]; + for (final State s : sb) { + b1.remove(s); + b2.add(s); + block[s.number] = k; for (int c = 0; c < sigmaLen; c++) { - final StateListNode sn = active2[i][c]; + final StateListNode sn = active2[s.number][c]; if (sn != null && sn.sl == active[j][c]) { sn.remove(); - active2[i][c] = active[k][c].add(states[i]); + active2[s.number][c] = active[k][c].add(s); } } } @@ -173,8 +178,8 @@ k++; } refine2.clear(j); - for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) - split.clear(i); + for (State s : sb) + split.clear(s.number); sb.clear(); } refine.clear(); @@ -184,9 +189,7 @@ for (int n = 0; n < newstates.length; n++) { final State s = new State(); newstates[n] = s; - BitSet part = partition[n]; - for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) { - final State q = states[i]; + for (State q : partition[n]) { if (q == a.initial) a.initial = s; s.accept = q.accept; s.number = q.number; // select representative Index: lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java =================================================================== --- lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (revision 1103663) +++ lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (working copy) @@ -49,4 +49,9 @@ assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions()); } } + + /** n^2 space usage in Hopcroft minimization? */ + public void testMinimizeHuge() { + new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton(); + } }