Index: lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (revision 1386922) +++ lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (working copy) @@ -312,31 +312,37 @@ new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); } - // Up to two positive ints, shared, generally but not + // ListOfOutputs(PositiveIntOutputs), generally but not // monotonically increasing { if (VERBOSE) { - System.out.println("TEST: now test UpToTwoPositiveIntOutputs"); + System.out.println("TEST: now test OneOrMoreOutputs"); } - final UpToTwoPositiveIntOutputs outputs = UpToTwoPositiveIntOutputs.getSingleton(true); + final PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); + final ListOfOutputs outputs = new ListOfOutputs(_outputs); final List> pairs = new ArrayList>(terms.length); long lastOutput = 0; for(int idx=0;idx values = new ArrayList(); + for(int i=0;i(terms[idx], output)); } new FSTTester(random(), dir, inputMode, pairs, outputs, false).doTest(); @@ -383,7 +389,7 @@ // no pruning doTest(0, 0, true); - if (!(outputs instanceof UpToTwoPositiveIntOutputs)) { + if (!(outputs instanceof ListOfOutputs)) { // simple pruning doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0, true); @@ -488,12 +494,14 @@ willRewrite); for(InputOutput pair : pairs) { - if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) { - final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs; - final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output; + if (pair.output instanceof List) { + assertTrue(outputs instanceof ListOfOutputs); + + @SuppressWarnings("unchecked") List longValues = (List) pair.output; @SuppressWarnings("unchecked") final Builder builderObject = (Builder) builder; - builderObject.add(pair.input, _outputs.get(twoLongs.first)); - builderObject.add(pair.input, _outputs.get(twoLongs.second)); + for(Long value : longValues) { + builderObject.add(pair.input, value); + } } else { builder.add(pair.input, pair.output); } @@ -2289,4 +2297,75 @@ } } } + + public void testListOfOutputs() throws Exception { + PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); + ListOfOutputs outputs = new ListOfOutputs(_outputs); + final Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + + final IntsRef scratch = new IntsRef(); + // Add the same input more than once and the outputs + // are merged: + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); + builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L); + final FST fst = builder.finish(); + + Object output = Util.get(fst, new BytesRef("a")); + assertNotNull(output); + List outputList = outputs.asList(output); + assertEquals(3, outputList.size()); + assertEquals(1L, outputList.get(0).longValue()); + assertEquals(3L, outputList.get(1).longValue()); + assertEquals(0L, outputList.get(2).longValue()); + + output = Util.get(fst, new BytesRef("b")); + assertNotNull(output); + outputList = outputs.asList(output); + assertEquals(1, outputList.size()); + assertEquals(17L, outputList.get(0).longValue()); + } + + public void testListOfOutputsEmptyString() throws Exception { + PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton(); + ListOfOutputs outputs = new ListOfOutputs(_outputs); + final Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + + final IntsRef scratch = new IntsRef(); + builder.add(scratch, 0L); + builder.add(scratch, 1L); + builder.add(scratch, 17L); + builder.add(scratch, 1L); + + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L); + builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L); + builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 0L); + + final FST fst = builder.finish(); + + Object output = Util.get(fst, new BytesRef("")); + assertNotNull(output); + List outputList = outputs.asList(output); + assertEquals(4, outputList.size()); + assertEquals(0L, outputList.get(0).longValue()); + assertEquals(1L, outputList.get(1).longValue()); + assertEquals(17L, outputList.get(2).longValue()); + assertEquals(1L, outputList.get(3).longValue()); + + output = Util.get(fst, new BytesRef("a")); + assertNotNull(output); + outputList = outputs.asList(output); + assertEquals(3, outputList.size()); + assertEquals(1L, outputList.get(0).longValue()); + assertEquals(3L, outputList.get(1).longValue()); + assertEquals(0L, outputList.get(2).longValue()); + + output = Util.get(fst, new BytesRef("b")); + assertNotNull(output); + outputList = outputs.asList(output); + assertEquals(1, outputList.size()); + assertEquals(0L, outputList.get(0).longValue()); + } } Index: lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java (revision 1386922) +++ lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java (working copy) @@ -1,226 +0,0 @@ -package org.apache.lucene.util.fst; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; - -/** - * An FST {@link Outputs} implementation where each output - * is one or two non-negative long values. If it's a - * single output, Long is returned; else, TwoLongs. Order - * is preserved in the TwoLongs case, ie .first is the first - * input/output added to Builder, and .second is the - * second. You cannot store 0 output with this (that's - * reserved to mean "no output")! - * - * NOTE: the resulting FST is not guaranteed to be minimal! - * See {@link Builder}. - * - * @lucene.experimental - */ - -public final class UpToTwoPositiveIntOutputs extends Outputs { - - /** Holds two long outputs. */ - public final static class TwoLongs { - public final long first; - public final long second; - - public TwoLongs(long first, long second) { - this.first = first; - this.second = second; - assert first >= 0; - assert second >= 0; - } - - @Override - public String toString() { - return "TwoLongs:" + first + "," + second; - } - - @Override - public boolean equals(Object _other) { - if (_other instanceof TwoLongs) { - final TwoLongs other = (TwoLongs) _other; - return first == other.first && second == other.second; - } else { - return false; - } - } - - @Override - public int hashCode() { - return (int) ((first^(first>>>32)) ^ (second^(second>>32))); - } - } - - private final static Long NO_OUTPUT = new Long(0); - - private final boolean doShare; - - private final static UpToTwoPositiveIntOutputs singletonShare = new UpToTwoPositiveIntOutputs(true); - private final static UpToTwoPositiveIntOutputs singletonNoShare = new UpToTwoPositiveIntOutputs(false); - - private UpToTwoPositiveIntOutputs(boolean doShare) { - this.doShare = doShare; - } - - public static UpToTwoPositiveIntOutputs getSingleton(boolean doShare) { - return doShare ? singletonShare : singletonNoShare; - } - - public Long get(long v) { - if (v == 0) { - return NO_OUTPUT; - } else { - return Long.valueOf(v); - } - } - - public TwoLongs get(long first, long second) { - return new TwoLongs(first, second); - } - - @Override - public Long common(Object _output1, Object _output2) { - assert valid(_output1, false); - assert valid(_output2, false); - final Long output1 = (Long) _output1; - final Long output2 = (Long) _output2; - if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) { - return NO_OUTPUT; - } else if (doShare) { - assert output1 > 0; - assert output2 > 0; - return Math.min(output1, output2); - } else if (output1.equals(output2)) { - return output1; - } else { - return NO_OUTPUT; - } - } - - @Override - public Long subtract(Object _output, Object _inc) { - assert valid(_output, false); - assert valid(_inc, false); - final Long output = (Long) _output; - final Long inc = (Long) _inc; - assert output >= inc; - - if (inc == NO_OUTPUT) { - return output; - } else if (output.equals(inc)) { - return NO_OUTPUT; - } else { - return output - inc; - } - } - - @Override - public Object add(Object _prefix, Object _output) { - assert valid(_prefix, false); - assert valid(_output, true); - final Long prefix = (Long) _prefix; - if (_output instanceof Long) { - final Long output = (Long) _output; - if (prefix == NO_OUTPUT) { - return output; - } else if (output == NO_OUTPUT) { - return prefix; - } else { - return prefix + output; - } - } else { - final TwoLongs output = (TwoLongs) _output; - final long v = prefix; - return new TwoLongs(output.first + v, output.second + v); - } - } - - @Override - public void write(Object _output, DataOutput out) throws IOException { - assert valid(_output, true); - if (_output instanceof Long) { - final Long output = (Long) _output; - out.writeVLong(output<<1); - } else { - final TwoLongs output = (TwoLongs) _output; - out.writeVLong((output.first<<1) | 1); - out.writeVLong(output.second); - } - } - - @Override - public Object read(DataInput in) throws IOException { - final long code = in.readVLong(); - if ((code & 1) == 0) { - // single long - final long v = code >>> 1; - if (v == 0) { - return NO_OUTPUT; - } else { - return Long.valueOf(v); - } - } else { - // two longs - final long first = code >>> 1; - final long second = in.readVLong(); - return new TwoLongs(first, second); - } - } - - private boolean valid(Long o) { - assert o != null; - assert o instanceof Long; - assert o == NO_OUTPUT || o > 0; - return true; - } - - // Used only by assert - private boolean valid(Object _o, boolean allowDouble) { - if (!allowDouble) { - assert _o instanceof Long; - return valid((Long) _o); - } else if (_o instanceof TwoLongs) { - return true; - } else { - return valid((Long) _o); - } - } - - @Override - public Object getNoOutput() { - return NO_OUTPUT; - } - - @Override - public String outputToString(Object output) { - return output.toString(); - } - - @Override - public Object merge(Object first, Object second) { - assert valid(first, false); - assert valid(second, false); - return new TwoLongs((Long) first, (Long) second); - } -} Index: lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java (revision 1386922) +++ lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java (working copy) @@ -49,10 +49,27 @@ /** Eg add("foo", "bar") -> "foobar" */ public abstract T add(T prefix, T output); + /** Encode an output value into a {@link DataOutput}. */ public abstract void write(T output, DataOutput out) throws IOException; + /** Encode an final node output value into a {@link + * DataOutput}. By default this just calls {@link #write(Object, + * DataOutput)}. */ + public void writeFinalOutput(T output, DataOutput out) throws IOException { + write(output, out); + } + + /** Decode an output value previously written with {@link + * #write(Object, DataOutput)}. */ public abstract T read(DataInput in) throws IOException; + /** Decode an output value previously written with {@link + * #writeFinalOutput(Object, DataOutput)}. By default this + * just calls {@link #read(DataInput)}. */ + public T readFinalOutput(DataInput in) throws IOException { + return read(in); + } + /** NOTE: this output is compared with == so you must * ensure that all methods return the single object if * it's really no output */ Index: lucene/core/src/java/org/apache/lucene/util/fst/Builder.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (revision 1386922) +++ lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (working copy) @@ -399,8 +399,10 @@ } final UnCompiledNode lastNode = frontier[input.length]; - lastNode.isFinal = true; - lastNode.output = NO_OUTPUT; + if (lastInput.length != input.length || prefixLenPlus1 != input.length + 1) { + lastNode.isFinal = true; + lastNode.output = NO_OUTPUT; + } // push conflicting outputs forward, only as far as // needed Index: lucene/core/src/java/org/apache/lucene/util/fst/FST.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/FST.java (revision 1386922) +++ lucene/core/src/java/org/apache/lucene/util/fst/FST.java (working copy) @@ -296,11 +296,13 @@ // messy bytes = new byte[numBytes]; in.readBytes(bytes, 0, numBytes); + BytesReader reader; if (packed) { - emptyOutput = outputs.read(getBytesReader(0)); + reader = getBytesReader(0); } else { - emptyOutput = outputs.read(getBytesReader(numBytes-1)); + reader = getBytesReader(numBytes-1); } + emptyOutput = outputs.readFinalOutput(reader); } else { emptyOutput = null; } @@ -414,7 +416,7 @@ // TODO: this is messy -- replace with sillyBytesWriter; maybe make // bytes private final int posSave = writer.posWrite; - outputs.write(emptyOutput, writer); + outputs.writeFinalOutput(emptyOutput, writer); emptyOutputBytes = new byte[writer.posWrite-posSave]; if (!packed) { @@ -638,7 +640,7 @@ if (arc.nextFinalOutput != NO_OUTPUT) { //System.out.println(" write final output"); - outputs.write(arc.nextFinalOutput, writer); + outputs.writeFinalOutput(arc.nextFinalOutput, writer); } if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) { @@ -788,7 +790,7 @@ outputs.read(in); } if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - outputs.read(in); + outputs.readFinalOutput(in); } if (arc.flag(BIT_STOP_NODE)) { } else if (arc.flag(BIT_TARGET_NEXT)) { @@ -963,7 +965,7 @@ } if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - arc.nextFinalOutput = outputs.read(in); + arc.nextFinalOutput = outputs.readFinalOutput(in); } else { arc.nextFinalOutput = outputs.getNoOutput(); } @@ -1127,7 +1129,7 @@ } if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) { - outputs.read(in); + outputs.readFinalOutput(in); } if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) { @@ -1221,6 +1223,14 @@ } } + /** Returns a {@link BytesReader} for this FST, positioned at + * position 0. */ + public BytesReader getBytesReader() { + return getBytesReader(0); + } + + /** Returns a {@link BytesReader} for this FST, positioned at + * the provided position. */ public BytesReader getBytesReader(int pos) { // TODO: maybe re-use via ThreadLocal? if (packed) { @@ -1654,7 +1664,7 @@ } } if (arc.nextFinalOutput != NO_OUTPUT) { - outputs.write(arc.nextFinalOutput, writer); + outputs.writeFinalOutput(arc.nextFinalOutput, writer); } if (doWriteTarget) { Index: lucene/core/src/java/org/apache/lucene/util/fst/ListOfOutputs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/ListOfOutputs.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/util/fst/ListOfOutputs.java (working copy) @@ -0,0 +1,188 @@ +package org.apache.lucene.util.fst; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; + +/** + * Wraps another Outputs implementation and encodes one or + * more of its output values. You can use this when a single + * input may need to map to more than one output, + * maintaining order: pass the same input with a different + * output by calling {@link Builder#add(IntsRef,Object)} multiple + * times. The builder will then combine the outputs using + * the {@link Outputs#merge(Object,Object)} method. + * + *

The resulting FST may not be minimal when an input has + * more than one output, as this requires pushing all + * multi-output values to a final state. + * + *

NOTE: this cannot wrap itself (ie you cannot make an + * FST with List> outputs using this). + * + * @lucene.experimental + */ + + +// NOTE: i think we could get a more compact FST if, instead +// of adding the same input multiple times with a different +// output each time, we added it only once with a +// pre-constructed List output. This way the "multiple +// values" is fully opaque to the Builder/FST. It would +// require implementing the full algebra using set +// arithmetic (I think?); maybe SetOfOutputs is a good name. + +@SuppressWarnings("unchecked") +public final class ListOfOutputs extends Outputs { + + private final Outputs outputs; + + public ListOfOutputs(Outputs outputs) { + this.outputs = outputs; + } + + @Override + public Object common(Object output1, Object output2) { + // These will never be a list: + return outputs.common((T) output1, (T) output2); + } + + @Override + public Object subtract(Object object, Object inc) { + // These will never be a list: + return outputs.subtract((T) object, (T) inc); + } + + @Override + public Object add(Object prefix, Object output) { + assert !(prefix instanceof List); + if (!(output instanceof List)) { + return outputs.add((T) prefix, (T) output); + } else { + List outputList = (List) output; + List addedList = new ArrayList(outputList.size()); + for(T _output : outputList) { + addedList.add(outputs.add((T) prefix, _output)); + } + return addedList; + } + } + + @Override + public void write(Object output, DataOutput out) throws IOException { + assert !(output instanceof List); + outputs.write((T) output, out); + } + + @Override + public void writeFinalOutput(Object output, DataOutput out) throws IOException { + if (!(output instanceof List)) { + out.writeVInt(1); + outputs.write((T) output, out); + } else { + List outputList = (List) output; + out.writeVInt(outputList.size()); + for(T eachOutput : outputList) { + outputs.write(eachOutput, out); + } + } + } + + @Override + public Object read(DataInput in) throws IOException { + return outputs.read(in); + } + + @Override + public Object readFinalOutput(DataInput in) throws IOException { + int count = in.readVInt(); + if (count == 1) { + return outputs.read(in); + } else { + List outputList = new ArrayList(count); + for(int i=0;i outputList = (List) output; + + StringBuilder b = new StringBuilder(); + b.append('['); + + for(int i=0;i 0) { + b.append(", "); + } + b.append(outputs.outputToString(outputList.get(i))); + } + b.append(']'); + return b.toString(); + } + } + + @Override + public Object merge(Object first, Object second) { + List outputList = new ArrayList(); + if (!(first instanceof List)) { + outputList.add((T) first); + } else { + outputList.addAll((List) first); + } + if (!(second instanceof List)) { + outputList.add((T) second); + } else { + outputList.addAll((List) second); + } + //System.out.println("MERGE: now " + outputList.size() + " first=" + outputToString(first) + " second=" + outputToString(second)); + //System.out.println(" return " + outputToString(outputList)); + return outputList; + } + + @Override + public String toString() { + return "OneOrMoreOutputs(" + outputs + ")"; + } + + public List asList(Object output) { + if (!(output instanceof List)) { + List result = new ArrayList(1); + result.add((T) output); + return result; + } else { + return (List) output; + } + } +} Property changes on: lucene/core/src/java/org/apache/lucene/util/fst/ListOfOutputs.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property