diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java index 9616e6e..8012e05 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java @@ -141,7 +141,9 @@ public final class Bytes { } else if (mode == Mode.DEREF) { return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context); } else if (mode == Mode.SORTED) { - return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio); + + return new FSTSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio, true); + //return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio); } } else { if (mode == Mode.STRAIGHT) { @@ -149,7 +151,8 @@ public final class Bytes { } else if (mode == Mode.DEREF) { return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context); } else if (mode == Mode.SORTED) { - return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio); + return new FSTSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio, false); +// return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio); } } @@ -192,7 +195,9 @@ public final class Bytes { } else if (mode == Mode.DEREF) { return new FixedDerefBytesImpl.FixedDerefReader(dir, id, maxDoc, context); } else if (mode == Mode.SORTED) { - return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context, Type.BYTES_FIXED_SORTED, sortComparator); + return new FSTSortedBytesImpl.Reader(dir, id, maxDoc, context, Type.BYTES_FIXED_SORTED, sortComparator); + //return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context, Type.BYTES_FIXED_SORTED, sortComparator); + } } else { if (mode == Mode.STRAIGHT) { @@ -200,7 +205,9 @@ public final class Bytes { } else if (mode == Mode.DEREF) { return new VarDerefBytesImpl.VarDerefReader(dir, id, maxDoc, context); } else if (mode == Mode.SORTED) { - return new VarSortedBytesImpl.Reader(dir, id, maxDoc,context, Type.BYTES_VAR_SORTED, sortComparator); + //return new VarSortedBytesImpl.Reader(dir, id, maxDoc,context, Type.BYTES_VAR_SORTED, sortComparator); + return new FSTSortedBytesImpl.Reader(dir, id, maxDoc, context, Type.BYTES_VAR_SORTED, sortComparator); + } } @@ -453,14 +460,16 @@ public final class Bytes { * RamUsageEstimator.NUM_BYTES_INT); } assert size >= 0; - BytesRef ref = new BytesRef(size); - ref.length = size; - int ord = hash.add(ref); - if (ord < 0) { - ord = (-ord) - 1; - } - for (int i = lastDocId+1; i < docID; i++) { - docToEntry[i] = ord; + if (lastDocId+1 < docID) { + BytesRef ref = new BytesRef(size); + ref.length = size; + int ord = hash.add(ref); + if (ord < 0) { + ord = (-ord) - 1; + } + for (int i = lastDocId+1; i < docID; i++) { + docToEntry[i] = ord; + } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FSTSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FSTSortedBytesImpl.java new file mode 100644 index 0000000..b75b337 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FSTSortedBytesImpl.java @@ -0,0 +1,292 @@ +package org.apache.lucene.codecs.lucene40.values; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase; +import org.apache.lucene.codecs.lucene40.values.Bytes.DerefBytesWriterBase; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.SortedSource; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SortedBytesMergeUtils; +import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; +import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; +import org.apache.lucene.index.SortedBytesMergeUtils.ToFSTBytesRefConsumer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.FSTEnum; +import org.apache.lucene.util.fst.FSTEnum.SeekStatus; +import org.apache.lucene.util.fst.IntsRefFSTEnum; +import org.apache.lucene.util.fst.PositiveIntOutputs; +import org.apache.lucene.util.fst.Util; +import org.apache.lucene.util.packed.PackedInts; + + +/** + * @lucene.experimental + */ +class FSTSortedBytesImpl { + + static final String CODEC_NAME_IDX = "FSTSortedBytesIdx"; + static final String CODEC_NAME_DAT = "FSTSortedBytesDat"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + static final class Writer extends DerefBytesWriterBase { + private final Comparator comp; + private final Type type; + + public Writer(Directory dir, String id, Comparator comp, + Counter bytesUsed, IOContext context, float acceptableOverheadRatio, boolean fixed) throws IOException { + super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_FIXED_SORTED); + this.comp = comp; + if (fixed) { + type = Type.BYTES_FIXED_SORTED; + } else { + size = 0; + type = Type.BYTES_VAR_SORTED; + } + } + + @Override + public void merge(MergeState mergeState, DocValues[] docValues) + throws IOException { + boolean success = false; + try { + final MergeContext ctx = SortedBytesMergeUtils.init(type, docValues, comp, mergeState.segmentInfo.getDocCount()); + List slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx); + final IndexOutput datOut = getOrCreateDataOut(); + datOut.writeInt(ctx.sizePerValues); + final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new ToFSTBytesRefConsumer(datOut, acceptableOverheadRatio), slices); + + final IndexOutput idxOut = getOrCreateIndexOut(); + idxOut.writeInt(maxOrd); + final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length, + PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT); + for (SortedSourceSlice slice : slices) { + slice.writeOrds(ordsWriter); + } + ordsWriter.finish(); + success = true; + } finally { + releaseResources(); + if (success) { + IOUtils.close(getIndexOut(), getDataOut()); + } else { + IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut()); + } + + } + } + + @Override + protected void checkSize(BytesRef bytes) { + if (type == Type.BYTES_FIXED_SORTED) { + super.checkSize(bytes); + } + } + + // Important that we get docCount, in case there were + // some last docs that we didn't see + @Override + public void finishInternal(int docCount) throws IOException { + if (lastDocId+1 < docCount) { + fillDefault(docCount); + } + final IndexOutput datOut = getOrCreateDataOut(); + final int count = hash.size(); + final int[] address = new int[count]; + datOut.writeInt(type == Type.BYTES_VAR_SORTED ? -1 : size); + final ToFSTBytesRefConsumer consumer = new ToFSTBytesRefConsumer(datOut, acceptableOverheadRatio); + final int[] sortedEntries = hash.sort(comp); + // first dump bytes data, recording address as we go + final BytesRef spare = new BytesRef(size); + for (int i = 0; i < count; i++) { + final int e = sortedEntries[i]; + final BytesRef bytes = hash.get(e, spare); + assert type == Type.BYTES_VAR_SORTED || bytes.length == size : bytes.length + " " + size + " " + type; + consumer.consume(bytes, i, -1); + address[e] = i; + } + consumer.flush(); + final IndexOutput idxOut = getOrCreateIndexOut(); + idxOut.writeInt(count); + writeIndex(idxOut, docCount, count, address, docToEntry); + } + } + + static final class Reader extends BytesReaderBase { + private final int size; + private final int valueCount; + private final Comparator comparator; + + public Reader(Directory dir, String id, int maxDoc, IOContext context, + Type type, Comparator comparator) throws IOException { + super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type); + size = datIn.readInt(); + valueCount = idxIn.readInt(); + this.comparator = comparator; + } + + @Override + public Source load() throws IOException { + return new FSTSortedSource(cloneData(), cloneIndex(), valueCount, + comparator); + } + + @Override + public Source getDirectSource() throws IOException { + return this.getSource(); //nocommit doesn't support direct source for now + } + + @Override + public int getValueSize() { + return size; + } + } + + static final class FSTSortedSource extends SortedSource { + private final int valueCount; + private final PackedInts.Reader docToOrdIndex; + private final FST fst; + + FSTSortedSource(IndexInput datIn, IndexInput idxIn, + int numValues, Comparator comp) throws IOException { + super(Type.BYTES_FIXED_SORTED, comp); + docToOrdIndex = PackedInts.getReader(idxIn); + fst = new FST(datIn, PositiveIntOutputs.getSingleton(true)); + this.valueCount = numValues; + IOUtils.close(datIn, idxIn); + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public boolean hasPackedDocToOrd() { + return true; + } + + @Override + public PackedInts.Reader getDocToOrd() { + return docToOrdIndex; + } + + @Override + public int ord(int docID) { + assert docToOrdIndex.get(docID) < getValueCount(); + return (int) docToOrdIndex.get(docID); + } + + @Override + public BytesRef getByOrd(int ord, BytesRef bytesRef) { + try { + final IntsRef ic = Util.getByOutput(fst, ord); + assert ic != null : "ord=" + ord; + assert bytesRef != null; + return Util.toBytesRef(ic, bytesRef); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public int getOrdByValue(BytesRef value, BytesRef spare) { + try { + //NOCOMMIT + // phew this seems costly! a FSTEnum per lookup. We should reuse somehow + // it's odd that we have a spare we don't use. it seems like we need to fix this + // interface to either take an extensible "spare" or keep things in a ThreadLocal? + Lookup lookup = new Lookup(fst); + SeekStatus seek = lookup.seekCeil(value); + switch (seek) { + case END: + return -(valueCount+1); + case FOUND: + return lookup.getOutput().intValue(); + case NOT_FOUND: + return -(lookup.getOutput().intValue()+1); + default: + throw new IllegalStateException("unknown seek status: " + seek); + } + } catch (IOException e) { + throw new RuntimeException(); + } + } + } + + private static class Lookup extends FSTEnum { + private final BytesRef current = new BytesRef(10); + BytesRef target; + protected Lookup(FST fst) { + super(fst); + } + + @Override + protected int getCurrentLabel() { + // current.offset fixed at 1 + return current.bytes[upto] & 0xFF; + } + + @Override + protected void setCurrentLabel(int label) { + current.bytes[upto] = (byte) label; + } + + + public SeekStatus seekCeil(BytesRef target) throws IOException { + this.target = target; + targetLength = target.length; + return doSeekCeil(); + + } + + public T getOutput() { + return output[upto]; + } + + @Override + protected void grow() { + current.bytes = ArrayUtil.grow(current.bytes, upto+1); + } + + @Override + protected int getTargetLabel() { + if (upto-1 == target.length) { + return FST.END_LABEL; + } else { + return target.bytes[target.offset + upto - 1] & 0xFF; + } + } + + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index f658fac..22f8b31 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -445,6 +445,8 @@ public class MultiDocValues extends DocValues { ordToOffset[ord+1] = offset; } } + @Override + public void flush() {} } diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java index 2a93ea0..a531a73 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java @@ -28,8 +28,13 @@ import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.fst.Builder; +import org.apache.lucene.util.fst.PositiveIntOutputs; +import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.fst.FST; /** * @lucene.internal @@ -164,6 +169,7 @@ public final class SortedBytesMergeUtils { consumer.consume(currentMergedBytes, merger.currentOrd, offset); merger.pushTop(); } + consumer.flush(); ctx.offsets = offsets; assert offsets == null || offsets[merger.currentOrd - 1] == offset; return merger.currentOrd; @@ -171,6 +177,7 @@ public final class SortedBytesMergeUtils { public static interface BytesRefConsumer { public void consume(BytesRef ref, int ord, long offset) throws IOException; + public void flush() throws IOException; } public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer { @@ -185,7 +192,40 @@ public final class SortedBytesMergeUtils { datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset, currentMergedBytes.length); } + + @Override + public void flush() {} } + + public static final class ToFSTBytesRefConsumer implements BytesRefConsumer { + private final IndexOutput datOut; + private final PositiveIntOutputs fstOutputs; + private final Builder builder; + private final float acceptableOverheadRatio; + public ToFSTBytesRefConsumer(IndexOutput datOut, float acceptableOverheadRatio) { + this.datOut = datOut; + this.fstOutputs = PositiveIntOutputs.getSingleton(true); + builder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, fstOutputs, null, true); + this.acceptableOverheadRatio = acceptableOverheadRatio; + } + + @Override + public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException { + final IntsRef scratchIntsRef = new IntsRef(); + builder.add(Util.toIntsRef(currentMergedBytes, scratchIntsRef), (long) ord); + + } + + @Override + public void flush() throws IOException { + FST fst = builder.finish(); + //nocommit - are those values ok? + FST packed = fst.pack(3, 1000000, acceptableOverheadRatio); + packed.save(datOut); + + } + } + private static final class RecordMerger { private final MergeQueue queue; diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java index d4806fb..fa61888 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java @@ -24,10 +24,11 @@ import java.io.IOException; /** Can next() and advance() through the terms in an FST * - * @lucene.experimental -*/ + * @lucene.experimental + * @lucene.internal + */ -abstract class FSTEnum { +public abstract class FSTEnum { protected final FST fst; @SuppressWarnings({"rawtypes","unchecked"}) protected FST.Arc[] arcs = new FST.Arc[10]; @@ -115,7 +116,7 @@ abstract class FSTEnum { // SEEK_END)? saves the eq check above? /** Seeks to smallest term that's >= target. */ - protected void doSeekCeil() throws IOException { + protected SeekStatus doSeekCeil() throws IOException { //System.out.println(" advance len=" + target.length + " curlen=" + current.length); @@ -178,7 +179,7 @@ abstract class FSTEnum { assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid; output[upto] = fst.outputs.add(output[upto-1], arc.output); if (targetLabel == FST.END_LABEL) { - return; + return SeekStatus.FOUND; } setCurrentLabel(arc.label); incr(); @@ -195,14 +196,14 @@ abstract class FSTEnum { upto--; while(true) { if (upto == 0) { - return; + return SeekStatus.END; } final FST.Arc prevArc = getArc(upto); //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast()); if (!prevArc.isLast()) { fst.readNextArc(prevArc, fstReader); pushFirst(); - return; + return SeekStatus.NOT_FOUND; } upto--; } @@ -211,7 +212,7 @@ abstract class FSTEnum { fst.readNextRealArc(arc, in); assert arc.label > targetLabel; pushFirst(); - return; + return SeekStatus.NOT_FOUND; } } else { // Arcs are not array'd -- must do linear scan: @@ -219,7 +220,7 @@ abstract class FSTEnum { // recurse output[upto] = fst.outputs.add(output[upto-1], arc.output); if (targetLabel == FST.END_LABEL) { - return; + return SeekStatus.FOUND; } setCurrentLabel(arc.label); incr(); @@ -227,21 +228,21 @@ abstract class FSTEnum { targetLabel = getTargetLabel(); } else if (arc.label > targetLabel) { pushFirst(); - return; + return SeekStatus.NOT_FOUND; } else if (arc.isLast()) { // Dead end (target is after the last arc); // rollback to last fork then push upto--; while(true) { if (upto == 0) { - return; + return SeekStatus.END; } final FST.Arc prevArc = getArc(upto); //System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast()); if (!prevArc.isLast()) { fst.readNextArc(prevArc, fstReader); pushFirst(); - return; + return SeekStatus.NOT_FOUND; } upto--; } @@ -257,7 +258,7 @@ abstract class FSTEnum { // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND / // SEEK_END)? saves the eq check above? /** Seeks to largest term that's <= target. */ - protected void doSeekFloor() throws IOException { + protected SeekStatus doSeekFloor() throws IOException { // TODO: possibly caller could/should provide common // prefix length? ie this work may be redundant if @@ -318,7 +319,7 @@ abstract class FSTEnum { assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid; output[upto] = fst.outputs.add(output[upto-1], arc.output); if (targetLabel == FST.END_LABEL) { - return; + return SeekStatus.FOUND; } setCurrentLabel(arc.label); incr(); @@ -343,11 +344,11 @@ abstract class FSTEnum { fst.readNextArc(arc, fstReader); } pushLast(); - return; + return SeekStatus.NOT_FOUND; } upto--; if (upto == 0) { - return; + return SeekStatus.END; } targetLabel = getTargetLabel(); arc = getArc(upto); @@ -360,7 +361,7 @@ abstract class FSTEnum { assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel; assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel; pushLast(); - return; + return SeekStatus.NOT_FOUND; } } else { @@ -368,7 +369,7 @@ abstract class FSTEnum { // Match -- recurse output[upto] = fst.outputs.add(output[upto-1], arc.output); if (targetLabel == FST.END_LABEL) { - return; + return SeekStatus.FOUND; } setCurrentLabel(arc.label); incr(); @@ -390,11 +391,11 @@ abstract class FSTEnum { fst.readNextArc(arc, fstReader); } pushLast(); - return; + return SeekStatus.NOT_FOUND; } upto--; if (upto == 0) { - return; + return SeekStatus.END; } targetLabel = getTargetLabel(); arc = getArc(upto); @@ -403,14 +404,14 @@ abstract class FSTEnum { //System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")"); if (fst.readNextArcLabel(arc, fstReader) > targetLabel) { pushLast(); - return; + return SeekStatus.NOT_FOUND; } else { // keep scanning fst.readNextArc(arc, fstReader); } } else { pushLast(); - return; + return SeekStatus.NOT_FOUND; } } } @@ -526,4 +527,7 @@ abstract class FSTEnum { } return arcs[idx]; } + + public static enum SeekStatus {END, FOUND, NOT_FOUND}; + } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java index 6eb3346..3a0f3ff 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java @@ -62,6 +62,43 @@ public class TestDocValues extends LuceneTestCase { runTestBytes(Bytes.Mode.SORTED, true); runTestBytes(Bytes.Mode.SORTED, false); } + + public void simpleSortedBytesTest() throws IOException { + Directory dir = newDirectory(); + final Counter trackBytes = Counter.newCounter(); + final boolean fixed = random().nextBoolean(); + DocValuesConsumer w = Bytes.getWriter(dir, "test", Bytes.Mode.SORTED, + fixed, COMP, trackBytes, newIOContext(random()), random().nextFloat() + * PackedInts.FAST); + DocValueHolder valueHolder = new DocValueHolder(); + + valueHolder.bytes = new BytesRef("aaa"); + w.add(0, valueHolder); + valueHolder.bytes = new BytesRef("aac"); + w.add(1, valueHolder); + valueHolder.bytes = new BytesRef("acb"); + w.add(2, valueHolder); + w.finish(3); + DocValues r = Bytes.getValues(dir, "test", Bytes.Mode.SORTED, fixed, 3, + COMP, newIOContext(random())); + SortedSource asSortedSource = r.getSource().asSortedSource(); + assertEquals(new BytesRef("aaa"), + asSortedSource.getByOrd(0, new BytesRef())); + assertEquals(new BytesRef("aac"), + asSortedSource.getByOrd(1, new BytesRef())); + assertEquals(new BytesRef("acv"), + asSortedSource.getByOrd(2, new BytesRef())); + assertEquals(-1, asSortedSource.getOrdByValue(new BytesRef(""), null)); + assertEquals(-1, asSortedSource.getOrdByValue(new BytesRef("AAA"), null)); + assertEquals(0, asSortedSource.getOrdByValue(new BytesRef("aaa"), null)); + assertEquals(-2, asSortedSource.getOrdByValue(new BytesRef("aab"), null)); + assertEquals(2, asSortedSource.getOrdByValue(new BytesRef("acb"), null)); + assertEquals(-3, asSortedSource.getOrdByValue(new BytesRef("abb"), null)); + assertEquals(-4, asSortedSource.getOrdByValue(new BytesRef("bbb"), null)); + + r.close(); + dir.close(); + } public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 1dab8a8..275f4de 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -808,16 +808,18 @@ public class TestDocValuesIndexing extends LuceneTestCase { if (rarely()) { w.commit(); } - int numDocsNoValue = atLeast(10); + int numDocsNoValue = random().nextBoolean() ? 0 : atLeast(10); for (int i = 0; i < numDocsNoValue; i++) { Document doc = new Document(); doc.add(newTextField("id", "noValue", Field.Store.YES)); w.addDocument(doc); } - BytesRef bytesRef = new BytesRef(fixed ? len : 0); - bytesRef.offset = 0; - bytesRef.length = fixed ? len : 0; - hash.add(bytesRef); // add empty value for the gaps + if (numDocsNoValue > 0) { + BytesRef bytesRef = new BytesRef(fixed ? len : 0); + bytesRef.offset = 0; + bytesRef.length = fixed ? len : 0; + hash.add(bytesRef); // add empty value for the gaps + } if (rarely()) { w.commit(); }