comparator) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+
+ /**
+ * Source of integer (returned as java long), per document. The underlying
+ * implementation may use different numbers of bits per value; long is only
+ * used since it can handle all precisions.
+ */
+ public static abstract class Source implements ConsumesRAM {
+
+ public long ints(int docID) {
+ throw new UnsupportedOperationException("ints are not supported");
+ }
+
+ public double floats(int docID) {
+ throw new UnsupportedOperationException("floats are not supported");
+ }
+
+ public BytesRef bytes(int docID) {
+ throw new UnsupportedOperationException("bytes are not supported");
+ }
+
+ /** Returns number of unique values. Some impls may
+ * throw UnsupportedOperationException. */
+ public int getValueCount() {
+ throw new UnsupportedOperationException();
+ }
+
+ public ValuesEnum getEnum() throws IOException{
+ return getEnum(null);
+ }
+
+ // nocommit - enable obtaining enum from source since this is already in memory
+ public /*abstract*/ ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+
+ public static abstract class SortedSource extends Source {
+
+ @Override
+ public BytesRef bytes(int docID) {
+ return getByOrd(ord(docID));
+ }
+
+ /**
+ * Returns ord for specified docID. If this docID had not been added to the
+ * Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
+ * for the next (as defined by {@link Comparator} value.
+ */
+ public abstract int ord(int docID);
+
+ /** Returns value for specified ord. */
+ public abstract BytesRef getByOrd(int ord);
+
+ public static class LookupResult {
+ public boolean found;
+ public int ord;
+ }
+
+ /**
+ * Finds the largest ord whose value is <= the requested value. If
+ * {@link LookupResult#found} is true, then ord is an exact match. The
+ * returned {@link LookupResult} may be reused across calls.
+ */
+ public abstract LookupResult getByValue(BytesRef value);
+ }
+
+}
Property changes on: src/java/org/apache/lucene/index/values/Reader.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/Values.java
===================================================================
--- src/java/org/apache/lucene/index/values/Values.java (revision 0)
+++ src/java/org/apache/lucene/index/values/Values.java (revision 0)
@@ -0,0 +1,48 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Controls whether per-field values are stored into
+ * index. This storage is non-sparse, so it's best to
+ * use this when all docs have the field, and loads all
+ * values into RAM, exposing a random access API, when
+ * loaded.
+ *
+ * NOTE: This feature is experimental and the
+ * API is free to change in non-backwards-compatible ways. */
+public enum Values {
+
+ /** Integral value is stored as packed ints. The bit
+ * precision is fixed across the segment, and
+ * determined by the min/max values in the field. */
+ PACKED_INTS,
+ PACKED_INTS_FIXED,
+ SIMPLE_FLOAT_4BYTE,
+ SIMPLE_FLOAT_8BYTE,
+
+ // nocommit -- shouldn't lucene decide/detect straight vs
+ // deref, as well fixed vs var?
+ BYTES_FIXED_STRAIGHT,
+ BYTES_FIXED_DEREF,
+ BYTES_FIXED_SORTED,
+
+ BYTES_VAR_STRAIGHT,
+ BYTES_VAR_DEREF,
+ BYTES_VAR_SORTED
+
+ // nocommit -- need STRING variants as well
+}
Property changes on: src/java/org/apache/lucene/index/values/Values.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/ValuesAttribute.java
===================================================================
--- src/java/org/apache/lucene/index/values/ValuesAttribute.java (revision 0)
+++ src/java/org/apache/lucene/index/values/ValuesAttribute.java (revision 0)
@@ -0,0 +1,34 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Comparator;
+
+import org.apache.lucene.index.values.ValuesEnum.FloatsRef;
+import org.apache.lucene.index.values.ValuesEnum.IntsRef;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.BytesRef;
+
+public interface ValuesAttribute extends Attribute {
+ public Values type();
+ public BytesRef bytes();
+ public FloatsRef floats();
+ public IntsRef ints();
+ public void setType(Values type);
+ public Comparator bytesComparator();
+ public void setBytesComparator(Comparator comp);
+
+}
\ No newline at end of file
Property changes on: src/java/org/apache/lucene/index/values/ValuesAttribute.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java (revision 0)
+++ src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java (revision 0)
@@ -0,0 +1,126 @@
+package org.apache.lucene.index.values;
+
+import java.util.Comparator;
+
+import org.apache.lucene.index.values.ValuesEnum.FloatsRef;
+import org.apache.lucene.index.values.ValuesEnum.IntsRef;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SetOnce;
+
+public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribute {
+ private final SetOnce type = new SetOnce();
+ private BytesRef bytes = null;
+ private FloatsRef floats = null;
+ private IntsRef ints = null;
+ private Comparator bytesComp;
+
+ public BytesRef bytes() {
+ return bytes;
+ }
+
+ public FloatsRef floats() {
+ return floats;
+ }
+
+ public IntsRef ints() {
+ return ints;
+ }
+
+ public Values type() {
+ return type.get();
+ }
+
+ public void setType(Values type) {
+ this.type.set(type);
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ bytes = new BytesRef();
+ break;
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ ints = new IntsRef();
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ floats = new FloatsRef();
+ break;
+
+ }
+ }
+
+ @Override
+ public void clear() {
+ // TODO
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ValuesAttributeImpl other = (ValuesAttributeImpl)target;
+ // nocommit - we may get rid of setOnce here
+ other.setType(type.get());
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#hashCode()
+ */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 0;
+ result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
+ result = prime * result + ((floats == null) ? 0 : floats.hashCode());
+ result = prime * result + ((ints == null) ? 0 : ints.hashCode());
+ result = prime * result + ((type == null) ? 0 : type.hashCode());
+ return result;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (getClass() != obj.getClass())
+ return false;
+ ValuesAttributeImpl other = (ValuesAttributeImpl) obj;
+ if (bytes == null) {
+ if (other.bytes != null)
+ return false;
+ } else if (!bytes.equals(other.bytes))
+ return false;
+ if (floats == null) {
+ if (other.floats != null)
+ return false;
+ } else if (!floats.equals(other.floats))
+ return false;
+ if (ints == null) {
+ if (other.ints != null)
+ return false;
+ } else if (!ints.equals(other.ints))
+ return false;
+ if (type == null) {
+ if (other.type != null)
+ return false;
+ } else if (!type.equals(other.type))
+ return false;
+ return true;
+ }
+
+ public Comparator bytesComparator() {
+ return bytesComp;
+ }
+
+ public void setBytesComparator(Comparator comp) {
+ bytesComp = comp;
+ }
+
+
+
+}
Property changes on: src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/ValuesEnum.java
===================================================================
--- src/java/org/apache/lucene/index/values/ValuesEnum.java (revision 0)
+++ src/java/org/apache/lucene/index/values/ValuesEnum.java (revision 0)
@@ -0,0 +1,109 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+
+public abstract class ValuesEnum extends DocIdSetIterator{
+ private AttributeSource source;
+ protected final ValuesAttribute attr;
+
+
+ protected ValuesEnum(Values enumType) {
+ this(null, enumType);
+ }
+
+ protected ValuesEnum(AttributeSource source, Values enumType) {
+ this.source = source;
+ boolean setType = !hasAttribute(ValuesAttribute.class);
+ attr = addAttribute(ValuesAttribute.class);
+ if (setType)
+ attr.setType(enumType);
+ }
+
+ public AttributeSource attributes() {
+ if (source == null)
+ source = new AttributeSource();
+ return source;
+ }
+
+ public T addAttribute(Class attr) {
+ return attributes().addAttribute(attr);
+ }
+
+ public T getAttribute(Class attr) {
+ return attributes().getAttribute(attr);
+ }
+
+ public boolean hasAttribute(Class attr) {
+ return attributes().hasAttribute(attr);
+ }
+
+ public abstract void close() throws IOException;
+
+ // nocommit - factor those out to utils and add common methods like other *Refs provide
+ // nocommit - this should be LongRef I guess as IntsRef already exists
+ public static class IntsRef {
+ public int pos;
+ public long[] val;
+
+ public IntsRef() {
+ this(new long[1], 0);
+ }
+
+ public IntsRef(long[] val, int pos) {
+ this.val = val;
+ this.pos = pos;
+ }
+
+ public long get() {
+ return val[pos];
+ }
+
+ public void set(long value) {
+ val[pos] = value;
+ }
+ }
+
+ // nocommit - factor those out to utils and add common methods like other *Refs provide
+ public static class FloatsRef {
+ public int pos;
+ public double[] val;
+
+ public FloatsRef() {
+ this(new double[1], 0);
+ }
+
+ public FloatsRef(double[] val, int pos) {
+ this.val = val;
+ this.pos = pos;
+ }
+
+ public double get() {
+ return val[pos];
+ }
+
+ public void set(double value) {
+ val[pos] = value;
+ }
+ }
+
+}
Property changes on: src/java/org/apache/lucene/index/values/ValuesEnum.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java (revision 0)
+++ src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java (revision 0)
@@ -0,0 +1,231 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesHash;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Stores variable-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[] and both
+// docs reference that single source
+
+class VarDerefBytesImpl {
+
+ static final String CODEC_NAME = "VarDerefBytes";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ static class Writer extends BytesWriterBase {
+ private int[] docToAddress;
+ private int address = 1;
+
+ final class Entry extends BytesHash.Entry {
+ int address;
+ }
+
+ private final BytesHash hash = new BytesHash(Entry.class) {
+ @Override
+ protected VarDerefBytesImpl.Writer.Entry newEntry() {
+ return new VarDerefBytesImpl.Writer.Entry();
+ }
+ @Override
+ public long bytesPerEntry() {
+ return super.bytesPerEntry() + RamUsageEstimator.NUM_BYTES_INT;
+ }
+ };
+
+ public Writer(Directory dir, String id) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false);
+ docToAddress = new int[1];
+ }
+
+ @Override
+ synchronized public void add(int docID, BytesRef bytes) throws IOException {
+ if(bytes.length == 0)
+ return; // default
+ if(datOut == null)
+ initDataOut();
+ Entry e = hash.add(bytes);
+
+ if (docID >= docToAddress.length) {
+ docToAddress = ArrayUtil.grow(docToAddress, 1+docID);
+ }
+ if (e.address == 0) {
+ e.address = address;
+ // New
+ if (bytes.length < 128) {
+ // 1 byte to store length
+ datOut.writeByte((byte) bytes.length);
+ address++;
+ } else {
+ // 2 byte to store length
+ datOut.writeByte((byte) (0x80 | (bytes.length & 0x7f)));
+ datOut.writeByte((byte) ((bytes.length>>7) & 0xff));
+ address += 2;
+ }
+ datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ address += bytes.length;
+ }
+
+ docToAddress[docID] = e.address;
+ }
+
+ synchronized public long ramBytesUsed() {
+ return RamUsageEstimator.NUM_BYTES_INT * docToAddress.length + hash.ramBytesUsed();
+ }
+
+ // Important that we get docCount, in case there were
+ // some last docs that we didn't see
+ @Override
+ synchronized public void finish(int docCount) throws IOException {
+ if(datOut == null)
+ return;
+ initIndexOut();
+ idxOut.writeInt(address-1);
+
+ // write index
+ // nocommit -- allow forcing fixed array (not -1)
+ // TODO(simonw): check the address calculation / make it more intuitive
+ PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1));
+ final int limit;
+ if (docCount > docToAddress.length) {
+ limit = docToAddress.length;
+ } else {
+ limit = docCount;
+ }
+ for(int i=0;i comp;
+
+ final class Entry extends BytesHash.Entry {
+ int index;
+ long offset;
+ }
+
+ private final BytesHash hash = new BytesHash(Entry.class) {
+ @Override
+ protected VarSortedBytesImpl.Writer.Entry newEntry() {
+ return new VarSortedBytesImpl.Writer.Entry();
+ }
+
+ @Override
+ public long bytesPerEntry() {
+ return super.bytesPerEntry() + RamUsageEstimator.NUM_BYTES_INT
+ + RamUsageEstimator.NUM_BYTES_LONG;
+ }
+ };
+
+ public Writer(Directory dir, String id, Comparator comp)
+ throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false);
+ this.comp = comp;
+ docToEntry = new Entry[1];
+ }
+
+ @Override
+ synchronized public void add(int docID, BytesRef bytes) throws IOException {
+ if(bytes.length == 0)
+ return;// default
+ if (docID >= docToEntry.length) {
+ Entry[] newArray = new Entry[ArrayUtil.oversize(1 + docID,
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
+ docToEntry = newArray;
+ }
+ docToEntry[docID] = hash.add(bytes);
+ }
+
+ synchronized public long ramBytesUsed() {
+ return RamUsageEstimator.NUM_BYTES_OBJ_REF * docToEntry.length
+ + hash.ramBytesUsed();
+ }
+
+ // Important that we get docCount, in case there were
+ // some last docs that we didn't see
+ @Override
+ synchronized public void finish(int docCount) throws IOException {
+ final int count = hash.size();
+ if (count == 0)
+ return;
+ initIndexOut();
+ initDataOut();
+ Entry[] sortedEntries = hash.sort(comp);
+
+ // first dump bytes data, recording index & offset as
+ // we go
+ long offset = 0;
+ long lastOffset = 0;
+ for (int i = 0; i < count; i++) {
+ final Entry e = sortedEntries[i];
+ e.offset = offset;
+ e.index = 1 + i;
+
+ final BytesRef bytes = hash.getBytes(e);
+ // TODO: we could prefix code...
+ datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ lastOffset = offset;
+ offset += bytes.length;
+ }
+
+ // total bytes of data
+ idxOut.writeLong(offset);
+
+ // write index -- first doc -> 1+ord
+ // nocommit -- allow not -1:
+ final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut,
+ docCount, PackedInts.bitsRequired(count));
+ final int limit = docCount > docToEntry.length ? docToEntry.length
+ : docCount;
+ for (int i = 0; i < limit; i++) {
+ final Entry e = docToEntry[i];
+ indexWriter.add(e == null ? 0 : e.index);
+ }
+ for (int i = limit; i < docCount; i++) {
+ indexWriter.add(0);
+ }
+ indexWriter.finish();
+
+ // next ord (0-based) -> offset
+ // nocommit -- allow not -1:
+ PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
+ PackedInts.bitsRequired(lastOffset));
+ for (int i = 0; i < count; i++) {
+ offsetWriter.add(sortedEntries[i].offset);
+ }
+ offsetWriter.finish();
+
+ super.finish(docCount);
+ }
+ }
+
+ public static class Reader extends BytesReaderBase {
+
+ Reader(Directory dir, String id, int maxDoc) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_START, true);
+ }
+
+ @Override
+ public org.apache.lucene.index.values.Reader.Source load()
+ throws IOException {
+ return loadSorted(null);
+ }
+
+ @Override
+ public SortedSource loadSorted(Comparator comp)
+ throws IOException {
+ return new Source(cloneData(), cloneIndex(), comp);
+ }
+
+ private static class Source extends BytesBaseSortedSource {
+ // TODO: paged data
+ private final byte[] data;
+ private final BytesRef bytesRef = new BytesRef();
+ private final PackedInts.Reader docToOrdIndex;
+ private final PackedInts.Reader ordToOffsetIndex; // 0-based
+ private final long totBytes;
+ private final int valueCount;
+ private final LookupResult lookupResult = new LookupResult();
+ private final Comparator comp;
+
+ public Source(IndexInput datIn, IndexInput idxIn,
+ Comparator comp) throws IOException {
+ super(datIn, idxIn);
+ totBytes = idxIn.readLong();
+ data = new byte[(int) totBytes];
+ datIn.readBytes(data, 0, (int) totBytes);
+ docToOrdIndex = PackedInts.getReader(idxIn);
+ ordToOffsetIndex = PackedInts.getReader(idxIn);
+ valueCount = ordToOffsetIndex.size();
+ bytesRef.bytes = data;
+ // default byte sort order
+ this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
+ : comp;
+
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord) {
+ return ord == 0 ? defaultValue : deref(--ord);
+ }
+
+ @Override
+ public int ord(int docID) {
+ return (int) docToOrdIndex.get(docID);
+ }
+
+ @Override
+ public LookupResult getByValue(BytesRef bytes) {
+ return binarySearch(bytes, 0, valueCount - 1);
+ }
+
+ public long ramBytesUsed() {
+ // TODO(simonw): move ram usage to PackedInts?
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ + data.length
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex
+ .getBitsPerValue()
+ * docToOrdIndex.getBitsPerValue())
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ordToOffsetIndex
+ .getBitsPerValue()
+ * ordToOffsetIndex.getBitsPerValue());
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ // ord is 0-based
+ private BytesRef deref(int ord) {
+ bytesRef.offset = (int) ordToOffsetIndex.get(ord);
+ final long nextOffset;
+ if (ord == valueCount - 1) {
+ nextOffset = totBytes;
+ } else {
+ nextOffset = ordToOffsetIndex.get(1 + ord);
+ }
+ bytesRef.length = (int) (nextOffset - bytesRef.offset);
+ return bytesRef;
+ }
+
+ // TODO: share w/ FixedSortedBytesValues?
+ private LookupResult binarySearch(BytesRef b, int low, int high) {
+
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ deref(mid);
+ final int cmp = comp.compare(bytesRef, b);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ lookupResult.ord = mid + 1;
+ lookupResult.found = true;
+ return lookupResult;
+ }
+ }
+ assert comp.compare(bytesRef, b) != 0;
+ lookupResult.ord = low;
+ lookupResult.found = false;
+ return lookupResult;
+ }
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ return new VarSortedBytesEnum(source, cloneData(), cloneIndex());
+ }
+
+ private static class VarSortedBytesEnum extends ValuesEnum {
+
+ private PackedInts.Reader docToOrdIndex;
+ private PackedInts.Reader ordToOffsetIndex;
+ private IndexInput idxIn;
+ private IndexInput datIn;
+ private final BytesRef bytesRef;
+ private int valueCount;
+ private long totBytes;
+ private int docCount;
+ private int pos = -1;
+ private final long fp;
+
+ protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn,
+ IndexInput idxIn) throws IOException {
+ super(source, Values.BYTES_VAR_SORTED);
+ bytesRef = attr.bytes();
+ totBytes = idxIn.readLong();
+ // keep that in memory to prevent lots of disk seeks
+ docToOrdIndex = PackedInts.getReader(idxIn);
+ ordToOffsetIndex = PackedInts.getReader(idxIn);
+ valueCount = ordToOffsetIndex.size();
+ docCount = docToOrdIndex.size();
+ fp = datIn.getFilePointer();
+ this.idxIn = idxIn;
+ this.datIn = datIn;
+ }
+
+ @Override
+ public void close() throws IOException {
+ idxIn.close();
+ datIn.close();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= docCount)
+ return pos = NO_MORE_DOCS;
+ final int ord = (int) docToOrdIndex.get(target)-1;
+ if(ord == -1) {
+ bytesRef.length = 0;
+ bytesRef.offset = 0;
+ return pos = target;
+ }
+ final long offset = ordToOffsetIndex.get(ord);
+ final long nextOffset;
+ if (ord == valueCount - 1) {
+ nextOffset = totBytes;
+ } else {
+ nextOffset = ordToOffsetIndex.get(1 + ord);
+ }
+ final int length = (int) (nextOffset - offset);
+ datIn.seek(fp + offset);
+ if (bytesRef.bytes.length < length)
+ bytesRef.grow(length);
+ datIn.readBytes(bytesRef.bytes, 0, length);
+ bytesRef.length = (int) length;
+ bytesRef.offset = 0;
+ return pos = target;
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(pos+1);
+ }
+ }
+ }
+}
Property changes on: src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java (revision 0)
+++ src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java (revision 0)
@@ -0,0 +1,223 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Variable length byte[] per document, no sharing
+
+class VarStraightBytesImpl {
+
+ static final String CODEC_NAME = "VarStraightBytes";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ static class Writer extends BytesWriterBase {
+ private int address;
+ // start at -1 if the first added value is > 0
+ private int lastDocID = -1;
+ private int[] docToAddress;
+
+ public Writer(Directory dir, String id) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false);
+ docToAddress = new int[1];
+ }
+
+ // Fills up to but not including this docID
+ private void fill(final int docID) {
+ if (docID >= docToAddress.length) {
+ docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
+ }
+ for (int i = lastDocID + 1; i < docID; i++) {
+ docToAddress[i] = address;
+ }
+ lastDocID = docID;
+ }
+
+ @Override
+ synchronized public void add(int docID, BytesRef bytes) throws IOException {
+ if(bytes.length == 0)
+ return; // default
+ if (datOut == null)
+ initDataOut();
+ fill(docID);
+ docToAddress[docID] = address;
+ datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ address += bytes.length;
+ }
+
+ @Override
+ synchronized public void finish(int docCount) throws IOException {
+ if (datOut == null)
+ return;
+ initIndexOut();
+ // write all lengths to index
+ // write index
+ fill(docCount);
+ idxOut.writeVInt(address);
+ // nocommit -- allow not -1
+ final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
+ PackedInts.bitsRequired(address));
+ for (int i = 0; i < docCount; i++) {
+ w.add(docToAddress[i]);
+ }
+ w.finish();
+ super.finish(docCount);
+ }
+
+ synchronized public long ramBytesUsed() {
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ + RamUsageEstimator.NUM_BYTES_INT * docToAddress.length;
+ }
+ }
+
+ public static class Reader extends BytesReaderBase {
+ private final int maxDoc;
+
+ Reader(Directory dir, String id, int maxDoc) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_START, true);
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public Source load() throws IOException {
+ return new Source(cloneData(), cloneIndex());
+ }
+
+ private class Source extends BytesBaseSource {
+ private final int totBytes;
+ // TODO: paged data
+ private final byte[] data;
+ private final BytesRef bytesRef = new BytesRef();
+ private final PackedInts.Reader addresses;
+
+ public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
+ super(datIn, idxIn);
+ totBytes = idxIn.readVInt();
+ data = new byte[totBytes];
+ datIn.readBytes(data, 0, totBytes);
+ addresses = PackedInts.getReader(idxIn);
+ bytesRef.bytes = data;
+ }
+
+ @Override
+ public BytesRef bytes(int docID) {
+ final int address = (int) addresses.get(docID);
+ bytesRef.offset = address;
+ if (docID == maxDoc - 1) {
+ bytesRef.length = totBytes - bytesRef.offset;
+ } else {
+ bytesRef.length = (int) addresses.get(1 + docID) - bytesRef.offset;
+ }
+ return bytesRef;
+ }
+
+ @Override
+ public int getValueCount() {
+ throw new UnsupportedOperationException();
+ }
+
+ public long ramBytesUsed() {
+ // TODO(simonw): move address ram usage to PackedInts?
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ + data.length
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses
+ .getBitsPerValue()
+ * addresses.size());
+ }
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ return new VarStrainghtBytesEnum(source, cloneData(), cloneIndex());
+ }
+
+ private class VarStrainghtBytesEnum extends ValuesEnum {
+ private final PackedInts.Reader addresses;
+ private final IndexInput datIn;
+ private final IndexInput idxIn;
+ private final long fp;
+ private final int totBytes;
+ private final BytesRef ref;
+ private int pos = -1;
+
+ protected VarStrainghtBytesEnum(AttributeSource source, IndexInput datIn,
+ IndexInput idxIn) throws IOException {
+ super(source, Values.BYTES_VAR_STRAIGHT);
+ totBytes = idxIn.readVInt();
+ fp = datIn.getFilePointer();
+ addresses = PackedInts.getReader(idxIn);
+ this.datIn = datIn;
+ this.idxIn = idxIn;
+ ref = attr.bytes();
+
+ }
+
+ @Override
+ public void close() throws IOException {
+ datIn.close();
+ idxIn.close();
+ }
+
+ @Override
+ public int advance(final int target) throws IOException {
+ if (target >= maxDoc) {
+ ref.length = 0;
+ ref.offset = 0;
+ return pos = NO_MORE_DOCS;
+ }
+ final long addr = addresses.get(target);
+ if (addr == totBytes) {
+ // nocommit is that a valid default value
+ ref.length = 0;
+ ref.offset = 0;
+ return pos = target;
+ }
+ datIn.seek(fp + addr);
+ final int size = (int) (target == maxDoc - 1 ? totBytes - addr
+ : addresses.get(target + 1) - addr);
+ if (ref.bytes.length < size)
+ ref.grow(size);
+ ref.length = size;
+ datIn.readBytes(ref.bytes, 0, size);
+ return pos = target;
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(pos+1);
+ }
+ }
+ }
+}
Property changes on: src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/index/values/Writer.java
===================================================================
--- src/java/org/apache/lucene/index/values/Writer.java (revision 0)
+++ src/java/org/apache/lucene/index/values/Writer.java (revision 0)
@@ -0,0 +1,94 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ConsumesRAM;
+import org.omg.CORBA.NO_MEMORY;
+
+public abstract class Writer implements ConsumesRAM {
+
+ /** Records the specfied value for the docID */
+ public void add(int docID, long value) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Records the specfied value for the docID */
+ public void add(int docID, double value) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Records the specfied value for the docID */
+ public void add(int docID, BytesRef value) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Records the specfied value for the docID */
+ protected abstract void add(int docID) throws IOException;
+
+ protected abstract void setNextAttribute(ValuesAttribute attr);
+
+ /** Finish writing, close any files */
+ public abstract void finish(int docCount) throws IOException;
+
+ public static class MergeState {
+ public final Reader reader;
+ public final int docBase;
+ public final int docCount;
+ public final Bits bits;
+
+ public MergeState(Reader reader, int docBase, int docCount, Bits bits) {
+ assert reader != null;
+ this.reader = reader;
+ this.docBase = docBase;
+ this.docCount = docCount;
+ this.bits = bits;
+ }
+ }
+
+ public void add(List states) throws IOException {
+ for (MergeState state : states) {
+ merge(state);
+ }
+ }
+
+ // enables bulk copies in subclasses per MergeState
+ protected void merge(MergeState state) throws IOException {
+ final ValuesEnum valEnum = state.reader.getEnum();
+ assert valEnum != null;
+ try {
+ final ValuesAttribute attr = valEnum.addAttribute(ValuesAttribute.class);
+ setNextAttribute(attr);
+ int docID = state.docBase;
+ final Bits bits = state.bits;
+ final int docCount = state.docCount;
+ for (int i = 0; i < docCount; i++) {
+ if (bits == null || !bits.get(i)) {
+ if (valEnum.advance(i) == ValuesEnum.NO_MORE_DOCS)
+ break;
+ add(docID++);
+ }
+ }
+ } finally {
+ valEnum.close();
+ }
+ }
+}
Property changes on: src/java/org/apache/lucene/index/values/Writer.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/search/FieldComparator.java
===================================================================
--- src/java/org/apache/lucene/search/FieldComparator.java (revision 983076)
+++ src/java/org/apache/lucene/search/FieldComparator.java (working copy)
@@ -22,6 +22,7 @@
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.Reader.Source;
import org.apache.lucene.search.FieldCache.DoubleParser;
import org.apache.lucene.search.FieldCache.LongParser;
import org.apache.lucene.search.FieldCache.ByteParser;
@@ -143,7 +144,6 @@
* @param reader current reader
* @param docBase docBase of this reader
* @throws IOException
- * @throws IOException
*/
public abstract void setNextReader(IndexReader reader, int docBase) throws IOException;
@@ -318,6 +318,64 @@
}
}
+ /** Uses float index values to sort by ascending value */
+ public static final class FloatIndexValuesComparator extends FieldComparator {
+ private final double[] values;
+ private Source currentReaderValues;
+ private final String field;
+ private double bottom;
+
+ FloatIndexValuesComparator(int numHits, String field) {
+ values = new double[numHits];
+ this.field = field;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ final double v1 = values[slot1];
+ final double v2 = values[slot2];
+ if (v1 > v2) {
+ return 1;
+ } else if (v1 < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ final double v2 = currentReaderValues.floats(doc);
+ if (bottom > v2) {
+ return 1;
+ } else if (bottom < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ values[slot] = currentReaderValues.floats(doc);
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ currentReaderValues = reader.getIndexValuesCache().getFloats(field);
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public Comparable value(int slot) {
+ return Double.valueOf(values[slot]);
+ }
+ }
+
/** Parses field's values as float (using {@link
* FieldCache#getFloats} and sorts by ascending value */
public static final class FloatComparator extends FieldComparator {
@@ -452,6 +510,68 @@
}
}
+ /** Loads int index values and sorts by ascending value. */
+ public static final class IntIndexValuesComparator extends FieldComparator {
+ private final long[] values;
+ private Source currentReaderValues;
+ private final String field;
+ private long bottom;
+
+ IntIndexValuesComparator(int numHits, String field) {
+ values = new long[numHits];
+ this.field = field;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ final long v1 = values[slot1];
+ final long v2 = values[slot2];
+ if (v1 > v2) {
+ return 1;
+ } else if (v1 < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ final long v2 = currentReaderValues.ints(doc);
+ if (bottom > v2) {
+ return 1;
+ } else if (bottom < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ values[slot] = currentReaderValues.ints(doc);
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ currentReaderValues = reader.getIndexValuesCache().getInts(field);
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public Comparable value(int slot) {
+ return Long.valueOf(values[slot]);
+ }
+ }
+
/** Parses field's values as long (using {@link
* FieldCache#getLongs} and sorts by ascending value */
public static final class LongComparator extends FieldComparator {
Index: src/java/org/apache/lucene/search/ReqExclScorer.java
===================================================================
--- src/java/org/apache/lucene/search/ReqExclScorer.java (revision 983076)
+++ src/java/org/apache/lucene/search/ReqExclScorer.java (working copy)
@@ -23,7 +23,7 @@
/** A Scorer for queries with a required subscorer
* and an excluding (prohibited) sub DocIdSetIterator.
*
- * This Scorer implements {@link Scorer#skipTo(int)},
+ * This Scorer implements {@link Scorer#advance(int)},
* and it uses the skipTo() on the given scorers.
*/
class ReqExclScorer extends Scorer {
Index: src/java/org/apache/lucene/search/ReqOptSumScorer.java
===================================================================
--- src/java/org/apache/lucene/search/ReqOptSumScorer.java (revision 983076)
+++ src/java/org/apache/lucene/search/ReqOptSumScorer.java (working copy)
@@ -21,7 +21,7 @@
/** A Scorer for queries with a required part and an optional part.
* Delays skipTo() on the optional part until a score() is needed.
*
- * This Scorer implements {@link Scorer#skipTo(int)}.
+ * This Scorer implements {@link Scorer#advance(int)}.
*/
class ReqOptSumScorer extends Scorer {
/** The scorers passed from the constructor.
Index: src/java/org/apache/lucene/search/SortField.java
===================================================================
--- src/java/org/apache/lucene/search/SortField.java (revision 983076)
+++ src/java/org/apache/lucene/search/SortField.java (working copy)
@@ -19,9 +19,15 @@
import java.io.IOException;
import java.io.Serializable;
+import java.util.Comparator;
import java.util.Locale;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.BytesRef;
+
+// nocommit -- for cleaner transition, maybe we should make
+// a new SortField that subclasses this one and always uses
+// index values?
/**
* Stores information about how to sort documents by terms in an individual
@@ -83,6 +89,9 @@
* uses ordinals to do the sorting. */
public static final int STRING_VAL = 11;
+ /** Sort use byte[] index values. */
+ public static final int BYTES = 12;
+
/** Represents sorting by document score (relevancy). */
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
@@ -358,6 +367,26 @@
field = StringHelper.intern(field);
}
+ private boolean useIndexValues;
+
+ public void setUseIndexValues(boolean b) {
+ useIndexValues = b;
+ }
+
+ public boolean getUseIndexValues() {
+ return useIndexValues;
+ }
+
+ private Comparator bytesComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+
+ public void setBytesComparator(Comparator b) {
+ bytesComparator = b;
+ }
+
+ public Comparator getBytesComparator() {
+ return bytesComparator;
+ }
+
/** Returns the {@link FieldComparator} to use for
* sorting.
*
@@ -387,10 +416,18 @@
return new FieldComparator.DocComparator(numHits);
case SortField.INT:
- return new FieldComparator.IntComparator(numHits, field, parser);
+ if (useIndexValues) {
+ return new FieldComparator.IntIndexValuesComparator(numHits, field);
+ } else {
+ return new FieldComparator.IntComparator(numHits, field, parser);
+ }
case SortField.FLOAT:
- return new FieldComparator.FloatComparator(numHits, field, parser);
+ if (useIndexValues) {
+ return new FieldComparator.FloatIndexValuesComparator(numHits, field);
+ } else {
+ return new FieldComparator.FloatComparator(numHits, field, parser);
+ }
case SortField.LONG:
return new FieldComparator.LongComparator(numHits, field, parser);
Index: src/java/org/apache/lucene/util/BytesHash.java
===================================================================
--- src/java/org/apache/lucene/util/BytesHash.java (revision 0)
+++ src/java/org/apache/lucene/util/BytesHash.java (revision 0)
@@ -0,0 +1,377 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit -- move to util?
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.index.ByteBlockPool;
+
+/**
+ * Hash's BytesRefs. BytesRef must be no longer than XXX in
+ * length.
+ *
+ * NOTE: this class is meant only to be used internally
+ * by Lucene; it's only public so it can be shared across
+ * packages. This means the API is freely subject to
+ * change, and, the class could be removed entirely, in any
+ * Lucene release. Use directly at your own risk!
+ */
+
+// nocommit -- reuse Entry instances?
+public abstract class BytesHash {
+
+ // nocommit -- factor properly so the byte pool uses this
+ // NOT DW's
+ public final static int BYTES_BLOCK_SHIFT = 15;
+ public final static int BYTES_BLOCK_SIZE = 1 << BYTES_BLOCK_SHIFT;
+ public final static int BYTES_BLOCK_MASK = BYTES_BLOCK_SIZE - 1;
+
+ // nocommit -- reuse?
+ private static class ByteBlockAllocator extends ByteBlockPool.Allocator {
+ int blockUsedCount;
+
+ @Override
+ public byte[] getByteBlock() {
+ blockUsedCount++;
+ return new byte[BYTES_BLOCK_SIZE];
+ }
+
+ @Override
+ public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+ blockUsedCount -= end-start;
+ }
+
+ public long ramBytesUsed() {
+ return blockUsedCount * BYTES_BLOCK_SIZE;
+ }
+
+ @Override
+ public void recycleByteBlocks(List blocks) {
+ blockUsedCount -= blocks.size();
+ }
+ }
+
+ public static class Entry {
+ public int bytesStart;
+ }
+
+ private final Class cl;
+ public final ByteBlockPool pool;
+ private int hashSize = 4;
+ private int hashHalfSize = hashSize/2;
+ private int hashMask = hashSize-1;
+ private int count;
+ private int lastCount = -1;
+ private final ByteBlockAllocator allocator;
+ private T[] hash;
+
+ @SuppressWarnings("unchecked")
+ public BytesHash(Class cl) {
+ this.cl = cl;
+ allocator = new ByteBlockAllocator();
+ pool = new ByteBlockPool(allocator);
+ hash = (T[]) Array.newInstance(cl, hashSize);
+ }
+
+ public int size() {
+ return count;
+ }
+
+ public BytesRef getBytes(T e) {
+ return deref(e.bytesStart, scratch1);
+ }
+
+ /** Destructive operation -- returns all Entry instances,
+ * in arbitrary order */
+ public T[] compact() {
+ int upto = 0;
+ for(int i=0;icomp) {
+ compact();
+ quickSort(comp, hash, 0, count-1);
+ return hash;
+ }
+
+ void quickSort(Comparator comp, T[] entries, int lo, int hi) {
+ if (lo >= hi)
+ return;
+ else if (hi == 1+lo) {
+ if (compare(comp, entries[lo], entries[hi]) > 0) {
+ final T tmp = entries[lo];
+ entries[lo] = entries[hi];
+ entries[hi] = tmp;
+ }
+ return;
+ }
+ int mid = (lo + hi) >>> 1;
+ if (compare(comp, entries[lo], entries[mid]) > 0) {
+ T tmp = entries[lo];
+ entries[lo] = entries[mid];
+ entries[mid] = tmp;
+ }
+
+ if (compare(comp, entries[mid], entries[hi]) > 0) {
+ T tmp = entries[mid];
+ entries[mid] = entries[hi];
+ entries[hi] = tmp;
+
+ if (compare(comp, entries[lo], entries[mid]) > 0) {
+ T tmp2 = entries[lo];
+ entries[lo] = entries[mid];
+ entries[mid] = tmp2;
+ }
+ }
+
+ int left = lo + 1;
+ int right = hi - 1;
+
+ if (left >= right)
+ return;
+
+ T partition = entries[mid];
+
+ for (; ;) {
+ while (compare(comp, entries[right], partition) > 0)
+ --right;
+
+ while (left < right && compare(comp, entries[left], partition) <= 0)
+ ++left;
+
+ if (left < right) {
+ T tmp = entries[left];
+ entries[left] = entries[right];
+ entries[right] = tmp;
+ --right;
+ } else {
+ break;
+ }
+ }
+
+ quickSort(comp, entries, lo, left);
+ quickSort(comp, entries, left + 1, hi);
+ }
+
+ private final BytesRef scratch1 = new BytesRef();
+ private final BytesRef scratch2 = new BytesRef();
+
+ private final BytesRef deref(int bytesStart, BytesRef b) {
+ b.bytes = pool.buffers[bytesStart >> BYTES_BLOCK_SHIFT];
+ int pos = bytesStart & BYTES_BLOCK_MASK;
+
+ if ((b.bytes[pos] & 0x80) == 0) {
+ // length is 1 byte
+ b.length = b.bytes[pos];
+ pos += 1;
+ } else {
+ // length is 2 bytes
+ b.length = (b.bytes[pos]&0x7f) + ((b.bytes[pos+1]&0xff)<<7);
+ pos += 2;
+ }
+ b.offset = pos;
+ return b;
+ }
+
+ private boolean equals(T e, BytesRef b) {
+ return deref(e.bytesStart, scratch1).bytesEquals(b);
+ }
+
+ private int compare(Comparator comp, T e1, T e2) {
+ return comp.compare(deref(e1.bytesStart, scratch1),
+ deref(e2.bytesStart, scratch2));
+ }
+
+ @SuppressWarnings("unchecked")
+ private boolean shrink(int targetSize) {
+
+ // Cannot use ArrayUtil.shrink because we require power
+ // of 2:
+ int newSize = hashSize;
+ while(newSize >= 8 && newSize/4 > targetSize) {
+ newSize /= 2;
+ }
+
+ if (newSize != hashSize) {
+ hashSize = newSize;
+ hash = (T[]) Array.newInstance(cl, hashSize);
+ hashHalfSize = newSize/2;
+ hashMask = newSize-1;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void clear() {
+ lastCount = count;
+ count = 0;
+ if (lastCount != -1) {
+ if (shrink(lastCount)) {
+ // shrink clears the hash entries
+ return;
+ }
+ }
+ Arrays.fill(hash, null);
+ }
+
+ public T add(BytesRef bytes) {
+ int code = 0;
+ final int end = bytes.offset + bytes.length;
+ // build hash
+ for(int i=bytes.offset;i>8)+code)|1;
+ do {
+ code += inc;
+ hashPos = code & hashMask;
+ e = hash[hashPos];
+ } while (e != null && !equals(e, bytes));
+ }
+
+ if (e == null) {
+ // new entry
+ final int len2 = 2+bytes.length;
+ if (len2 + pool.byteUpto > BYTES_BLOCK_SIZE) {
+ if (len2 > BYTES_BLOCK_SIZE) {
+ throw new IllegalArgumentException("bytes can be at most " + (BYTES_BLOCK_SIZE-2) + " in length; got " + bytes.length);
+ }
+ pool.nextBuffer();
+ }
+
+ e = newEntry();
+
+ final byte[] buffer = pool.buffer;
+ final int bufferUpto = pool.byteUpto;
+ e.bytesStart = bufferUpto + pool.byteOffset;
+
+ // We first encode the length, followed by the
+ // bytes. Length is encoded as vInt, but will consume
+ // 1 or 2 bytes at most (we reject too-long terms,
+ // above).
+ if (bytes.length < 128) {
+ // 1 byte to store length
+ buffer[bufferUpto] = (byte) bytes.length;
+ pool.byteUpto += bytes.length + 1;
+ System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto+1, bytes.length);
+ } else {
+ // 2 byte to store length
+ buffer[bufferUpto] = (byte) (0x80 | (bytes.length & 0x7f));
+ buffer[bufferUpto+1] = (byte) ((bytes.length>>7) & 0xff);
+ pool.byteUpto += bytes.length + 2;
+ System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto+2, bytes.length);
+ }
+ assert hash[hashPos] == null;
+ hash[hashPos] = e;
+ count++;
+
+ if (count == hashHalfSize) {
+ rehash(2*hashSize);
+ }
+ }
+ return e;
+ }
+
+ /** Called when postings hash is too small (> 50%
+ * occupied) or too large (< 20% occupied). */
+ void rehash(final int newSize) {
+
+ final int newMask = newSize-1;
+
+ @SuppressWarnings("unchecked")
+ T[] newHash = (T[]) Array.newInstance(cl, newSize);
+ for(int i=0;i> BYTES_BLOCK_SHIFT];
+ code = 0;
+
+ final int len;
+ int pos;
+ if ((bytes[start] & 0x80) == 0) {
+ // length is 1 byte
+ len = bytes[start];
+ pos = start+1;
+ } else {
+ len = (bytes[start]&0x7f) + ((bytes[start+1]&0xff)<<7);
+ pos = start+2;
+ }
+
+ final int endPos = pos+len;
+ while(pos < endPos) {
+ code = (code*31) + bytes[pos++];
+ }
+
+ int hashPos = code & newMask;
+ assert hashPos >= 0;
+ if (newHash[hashPos] != null) {
+ final int inc = ((code>>8)+code)|1;
+ do {
+ code += inc;
+ hashPos = code & newMask;
+ } while (newHash[hashPos] != null);
+ }
+ newHash[hashPos] = e0;
+ }
+ }
+
+ hashMask = newMask;
+ hash = newHash;
+ hashSize = newSize;
+ hashHalfSize = newSize >> 1;
+ }
+
+ protected abstract T newEntry();
+
+ public long ramBytesUsed() {
+ return allocator.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJ_REF * hashSize + count * bytesPerEntry();
+ }
+
+ protected long bytesPerEntry() {
+ return RamUsageEstimator.NUM_BYTES_OBJ_HEADER + RamUsageEstimator.NUM_BYTES_INT;
+ }
+}
Property changes on: src/java/org/apache/lucene/util/BytesHash.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/util/BytesRef.java
===================================================================
--- src/java/org/apache/lucene/util/BytesRef.java (revision 983076)
+++ src/java/org/apache/lucene/util/BytesRef.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.util.Comparator;
+import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
@@ -259,12 +260,13 @@
}
private final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
-
+
public static Comparator getUTF8SortedAsUnicodeComparator() {
return utf8SortedAsUnicodeSortOrder;
}
- private static class UTF8SortedAsUnicodeComparator implements Comparator {
+ @SuppressWarnings("serial") // serializable to work with contrib/remote
+ private static final class UTF8SortedAsUnicodeComparator implements Serializable, Comparator {
// Only singleton
private UTF8SortedAsUnicodeComparator() {};
Index: src/java/org/apache/lucene/util/ConsumesRAM.java
===================================================================
--- src/java/org/apache/lucene/util/ConsumesRAM.java (revision 0)
+++ src/java/org/apache/lucene/util/ConsumesRAM.java (revision 0)
@@ -0,0 +1,22 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface ConsumesRAM {
+ public long ramBytesUsed();
+}
Property changes on: src/java/org/apache/lucene/util/ConsumesRAM.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/java/org/apache/lucene/util/packed/Packed64.java
===================================================================
--- src/java/org/apache/lucene/util/packed/Packed64.java (revision 983076)
+++ src/java/org/apache/lucene/util/packed/Packed64.java (working copy)
@@ -182,7 +182,7 @@
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
final int base = bitPos * FAC_BITPOS;
-
+ assert elementPos < blocks.length : "elementPos: " + elementPos + "; blocks.len: " + blocks.length;
return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
}
Index: src/java/org/apache/lucene/util/packed/PackedInts.java
===================================================================
--- src/java/org/apache/lucene/util/packed/PackedInts.java (revision 983076)
+++ src/java/org/apache/lucene/util/packed/PackedInts.java (working copy)
@@ -77,6 +77,21 @@
/** Returns number of values */
int size();
}
+
+ /**
+ * Run-once forward only enum interface, to decode and skipt previously
+ * saved {@link PackedInts}
+ */
+ public static interface PackedIntEnum extends ReaderIterator {
+
+ /** Returns the current position of the enum */
+ int docID();
+
+ /** Skips to the position in the enum and returns its value.
+ * @return the value at the given position
+ * @throws IOException if reading the value throws an IOException*/
+ long advance(int position) throws IOException;
+ }
/**
* A packed integer array that can be modified.
@@ -192,9 +207,13 @@
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
-
return new PackedReaderIterator(bitsPerValue, valueCount, in);
}
+
+ public static PackedIntEnum getEnum(IndexInput in ) throws IOException{
+ // ReaderIterator implements PackedIntEnum
+ return (PackedIntEnum)getReaderIterator(in);
+ }
/**
* Create a packed integer array with the given amount of values initialized
Index: src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
===================================================================
--- src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (revision 983076)
+++ src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (working copy)
@@ -21,12 +21,13 @@
import java.io.IOException;
-class PackedReaderIterator implements PackedInts.ReaderIterator {
+class PackedReaderIterator implements PackedInts.PackedIntEnum {
private long pending;
private int pendingBitsLeft;
private final IndexInput in;
private final int bitsPerValue;
private final int valueCount;
+ private int position = -1;
// masks[n-1] masks for bottom n bits
private final long[] masks;
@@ -57,16 +58,16 @@
}
public long next() throws IOException {
+
if (pendingBitsLeft == 0) {
pending = in.readLong();
pendingBitsLeft = 64;
}
-
+ final long result;
if (pendingBitsLeft >= bitsPerValue) {
// not split
- final long result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
+ result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
pendingBitsLeft -= bitsPerValue;
- return result;
} else {
// split
final int bits1 = bitsPerValue - pendingBitsLeft;
@@ -74,11 +75,37 @@
pending = in.readLong();
final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
- return result1 | result2;
+ result = result1 | result2;
}
+ ++position;
+ return result;
}
public void close() throws IOException {
in.close();
}
+
+ public int docID() {
+ return position;
+ }
+
+ public long advance(final int ord) throws IOException{
+ assert ord < valueCount : "ord must be less than valueCount";
+ assert ord > position : "ord must be greater than the current position";
+ final int posToSkip = ord - 1 - position;
+ final long bitsToSkip = ((long)bitsPerValue * (long)posToSkip);
+ if(bitsToSkip < pendingBitsLeft ){
+ pendingBitsLeft -= bitsToSkip;
+ }else {
+ final long skip = bitsToSkip- pendingBitsLeft;
+ final long filePointer = in.getFilePointer();
+ final long closestByte = (skip >> 6) << 3;
+ if(closestByte > 0)
+ in.seek(filePointer + closestByte);
+ pending = in.readLong();
+ pendingBitsLeft = 64 - (int)(skip % 64);
+ }
+ position = ord-1;
+ return next();
+ }
}
Index: src/test/org/apache/lucene/index/TestByteSlices.java
===================================================================
--- src/test/org/apache/lucene/index/TestByteSlices.java (revision 983076)
+++ src/test/org/apache/lucene/index/TestByteSlices.java (working copy)
@@ -26,7 +26,7 @@
/* Allocate another byte[] from the shared pool */
@Override
- synchronized byte[] getByteBlock() {
+ public synchronized byte[] getByteBlock() {
final int size = freeByteBlocks.size();
final byte[] b;
if (0 == size)
@@ -38,13 +38,13 @@
/* Return a byte[] to the pool */
@Override
- synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) {
+ public synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) {
for(int i=start;i blocks) {
+ public synchronized void recycleByteBlocks(List blocks) {
final int size = blocks.size();
for(int i=0;i comp = mode == Bytes.Mode.SORTED ? BytesRef
+ .getUTF8SortedAsUnicodeComparator()
+ : null;
+
+ Directory dir = new MockRAMDirectory();
+ Writer w = Bytes
+ .getWriter(dir, "test", mode, comp, fixedSize);
+ int maxDoc = 220;
+ final String[] values = new String[maxDoc];
+ final int lenMin, lenMax;
+ if (fixedSize) {
+ lenMin = lenMax = 3 + rand.nextInt(7);
+ } else {
+ lenMin = 1;
+ lenMax = 15 + rand.nextInt(6);
+ }
+ for (int i = 0; i < 100; i++) {
+ final String s;
+ if (i > 0 && rand.nextInt(5) <= 2) {
+ // use prior value
+ s = values[2 * rand.nextInt(i)];
+ } else {
+ s = _TestUtil.randomUnicodeString(rand, lenMin, lenMax);
+ }
+ values[2 * i] = s;
+
+ UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef);
+ w.add(2 * i, bytesRef);
+ }
+ w.finish(maxDoc);
+
+ Reader r = Bytes.getReader(dir, "test", mode, fixedSize, maxDoc);
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum bytesEnum = r.getEnum();
+ assertNotNull("enum is null", bytesEnum);
+ ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
+ assertNotNull("attribute is null", attr);
+ BytesRef ref = attr.bytes();
+ assertNotNull("BytesRef is null - enum not initialized to use bytes", attr);
+
+ for (int i = 0; i < 2; i++) {
+ final int idx = 2 * i;
+ assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
+ assertEquals("doc: " + idx, values[idx], ref.utf8ToString());
+ }
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc+1));
+
+ bytesEnum.close();
+ }
+
+
+ // Verify we can load source twice:
+ for (int iter = 0; iter < 2; iter++) {
+ Source s;
+ Reader.SortedSource ss;
+ if (mode == Bytes.Mode.SORTED) {
+ s = ss = r.loadSorted(comp);
+ } else {
+ s = r.load();
+ ss = null;
+ }
+
+ for (int i = 0; i < 100; i++) {
+ final int idx = 2 * i;
+ assertNotNull("doc " + idx + "; value=" + values[idx], s.bytes(idx));
+ assertEquals("doc " + idx, values[idx], s.bytes(idx).utf8ToString());
+ if (ss != null) {
+ assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
+ .utf8ToString());
+ Reader.SortedSource.LookupResult result = ss.getByValue(new BytesRef(
+ values[idx]));
+ assertTrue(result.found);
+ assertEquals(ss.ord(idx), result.ord);
+ }
+ }
+
+ // Lookup random strings:
+ if (mode == Bytes.Mode.SORTED) {
+ final int numValues = ss.getValueCount();
+ for (int i = 0; i < 1000; i++) {
+ BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString(
+ rand, lenMin, lenMax));
+ SortedSource.LookupResult result = ss.getByValue(bytesValue);
+ if (result.found) {
+ assert result.ord > 0;
+ assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord)));
+ int count = 0;
+ for (int k = 0; k < 100; k++) {
+ if (bytesValue.utf8ToString().equals(values[2 * k])) {
+ assertEquals(ss.ord(2 * k), result.ord);
+ count++;
+ }
+ }
+ assertTrue(count > 0);
+ } else {
+ assert result.ord >= 0;
+ if (result.ord == 0) {
+ final BytesRef firstRef = ss.getByOrd(1);
+ // random string was before our first
+ assertTrue(firstRef.compareTo(bytesValue) > 0);
+ } else if (result.ord == numValues) {
+ final BytesRef lastRef = ss.getByOrd(numValues);
+ // random string was after our last
+ assertTrue(lastRef.compareTo(bytesValue) < 0);
+ } else {
+ // random string fell between two of our values
+ final BytesRef before = (BytesRef) ss.getByOrd(result.ord)
+ .clone();
+ final BytesRef after = ss.getByOrd(result.ord + 1);
+ assertTrue(before.compareTo(bytesValue) < 0);
+ assertTrue(bytesValue.compareTo(after) < 0);
+
+ }
+ }
+ }
+ }
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ public void testInts() throws IOException {
+ final Random rand = newRandom();
+ long maxV = 1;
+ final int NUM_VALUES = 1000;
+ final long[] values = new long[NUM_VALUES];
+ for (int rx = 1; rx < 63; rx++, maxV *= 2) {
+ for (int b = 0; b < 2; b++) {
+ Directory dir = new MockRAMDirectory();
+ boolean useFixedArrays = b == 0;
+ Writer w = Ints.getWriter(dir, "test", useFixedArrays);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = rand.nextLong() % (1 + maxV);
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalDocs = 1 + rand.nextInt(9);
+ w.finish(NUM_VALUES + additionalDocs);
+
+ Reader r = Ints.getReader(dir, "test", useFixedArrays);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = r.load();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = s.ints(i);
+ assertEquals("index " + i + " b: " + b, values[i], v);
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ IntsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals(values[i], ints.get());
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals("" + i, 0, ints.get());
+ }
+
+ iEnum.close();
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ IntsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i += 1 + rand.nextInt(25)) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals(values[i], ints.get());
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals("" + i, 0, ints.get());
+ }
+
+ iEnum.close();
+ }
+ r.close();
+ dir.close();
+ }
+ }
+ }
+
+ public void testFloats4() throws IOException {
+ runTestFloats(newRandom(), 4, 0.00001);
+ }
+
+ private void runTestFloats(Random rand, int precision, double delta)
+ throws IOException {
+ Directory dir = new MockRAMDirectory();
+ Writer w = Floats.getWriter(dir, "test", precision);
+ final int NUM_VALUES = 1000;
+ final double[] values = new double[NUM_VALUES];
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final double v = precision == 4 ? rand.nextFloat() : rand.nextDouble();
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalValues = 1 + rand.nextInt(10);
+ w.finish(NUM_VALUES + additionalValues);
+
+ Reader r = Floats.getReader(dir, "test", NUM_VALUES
+ + additionalValues);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = r.load();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(values[i], s.floats(i));
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum fEnum = r.getEnum();
+ ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
+ FloatsRef floats = attr.floats();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, fEnum.nextDoc());
+ assertEquals(values[i], floats.get(), delta);
+ }
+ for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ assertEquals(i, fEnum.nextDoc());
+ assertEquals(0.0, floats.get(), delta);
+ }
+ fEnum.close();
+ }
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum fEnum = r.getEnum();
+ ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
+ FloatsRef floats = attr.floats();
+ for (int i = 0; i < NUM_VALUES; i += 1 + rand.nextInt(25)) {
+ assertEquals(i, fEnum.advance(i));
+ assertEquals(values[i], floats.get(), delta);
+ }
+ for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ assertEquals(i, fEnum.advance(i));
+ assertEquals(0.0, floats.get(), delta);
+ }
+ fEnum.close();
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ public void testFloats8() throws IOException {
+ runTestFloats(newRandom(), 8, 0.0);
+ }
+
+ /**
+ * Tests complete indexing of {@link Values} including deletions, merging and
+ * sparse value fields on Compound-File
+ */
+ public void testCFSIndex() throws IOException {
+ final Random rand = newRandom();
+ // without deletions
+ IndexWriterConfig cfg = writerConfig(rand, true);
+ // primitives - no deletes
+ runTestNumerics(cfg, rand, false);
+
+ cfg = writerConfig(rand, true);
+ // bytes - no deletes
+ runTestIndexBytes(cfg, rand, false);
+
+ // with deletions
+ cfg = writerConfig(rand, true);
+ // primitives
+ runTestNumerics(cfg, rand, true);
+
+ cfg = writerConfig(rand, true);
+ // bytes
+ runTestIndexBytes(cfg, rand, true);
+ }
+
+ /**
+ * Tests complete indexing of {@link Values} including deletions, merging and
+ * sparse value fields on None-Compound-File
+ */
+ public void testIndex() throws IOException {
+ //
+ final Random rand = newRandom();
+ // without deletions
+ IndexWriterConfig cfg = writerConfig(rand, false);
+ // primitives - no deletes
+ runTestNumerics(cfg, rand, false);
+
+ cfg = writerConfig(rand, false);
+ // bytes - no deletes
+ runTestIndexBytes(cfg, rand, false);
+
+ // with deletions
+ cfg = writerConfig(rand, false);
+ // primitives
+ runTestNumerics(cfg, rand, true);
+
+ cfg = writerConfig(rand, false);
+ // bytes
+ runTestIndexBytes(cfg, rand, true);
+ }
+
+ private IndexWriterConfig writerConfig(Random rand, boolean useCompoundFile) {
+ final IndexWriterConfig cfg = newIndexWriterConfig(rand,
+ TEST_VERSION_CURRENT, new MockAnalyzer());
+ MergePolicy mergePolicy = cfg.getMergePolicy();
+ if(mergePolicy instanceof LogMergePolicy) {
+ ((LogMergePolicy)mergePolicy).setUseCompoundFile(useCompoundFile);
+ } else if(useCompoundFile) {
+ LogMergePolicy policy = new LogDocMergePolicy();
+ policy.setUseCompoundFile(useCompoundFile);
+ cfg.setMergePolicy(policy);
+ }
+ return cfg;
+ }
+
+ public void runTestNumerics(IndexWriterConfig cfg, Random rand,
+ boolean withDeletions) throws IOException {
+ Directory d = new MockRAMDirectory();
+ IndexWriter w = new IndexWriter(d, cfg);
+ final int numValues = 350;
+ final List numVariantList = new ArrayList(NUMERICS);
+
+ // run in random order to test if fill works correctly during merges
+ Collections.shuffle(numVariantList, rand);
+ for (Values val : numVariantList) {
+ OpenBitSet deleted = indexValues(rand, w, numValues, val, numVariantList,
+ withDeletions, 7);
+ List closeables = new ArrayList();
+ IndexReader r = w.getReader();
+ final int numRemainingValues = (int) (numValues - deleted.cardinality());
+ final int base = r.numDocs() - numRemainingValues;
+ switch (val) {
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED: {
+ Reader intsReader = r.getIndexValues(val.name());
+ Source ints = intsReader.load();
+ ValuesEnum intsEnum = intsReader.getEnum();
+ assertNotNull(intsEnum);
+ IntsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
+ closeables.add(intsReader);
+ for (int i = 0; i < base; i++) {
+ assertEquals(0, ints.ints(i));
+ assertEquals(val.name() + " base: " + base + " index: " + i, i, rand.nextBoolean()?intsEnum.advance(i): intsEnum.nextDoc());
+ assertEquals(0, enumRef.get());
+ }
+ int expected = 0;
+ for (int i = base; i < r.numDocs(); i++, expected++) {
+ while (deleted.get(expected)) {
+ expected++;
+ }
+ assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i));
+ assertEquals(expected, ints.ints(i));
+ assertEquals(expected, enumRef.get());
+
+ }
+ }
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE: {
+ Reader floatReader = r.getIndexValues(val.name());
+ Source floats = floatReader.load();
+ ValuesEnum floatEnum = floatReader.getEnum();
+ assertNotNull(floatEnum);
+ FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class).floats();
+ closeables.add(floatReader);
+
+ for (int i = 0; i < base; i++) {
+ assertEquals(0.0d, floats.floats(i));
+ assertEquals(i, rand.nextBoolean()?floatEnum.advance(i): floatEnum.nextDoc());
+ assertEquals("index " + i, 0.0 ,enumRef.get());
+ }
+ int expected = 0;
+ for (int i = base; i < r.numDocs(); i++, expected++) {
+ while (deleted.get(expected)) {
+ expected++;
+ }
+ assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i));
+ assertEquals("index " + i, 2.0 * expected ,enumRef.get() , 0.00001);
+ assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001);
+ }
+ }
+ break;
+ default:
+ fail("unexpected value " + val);
+ }
+
+ closeables.add(r);
+ for (Closeable toClose : closeables) {
+ toClose.close();
+ }
+ }
+ w.close();
+ d.close();
+ }
+
+ private static EnumSet BYTES = EnumSet.of(
+ Values.BYTES_FIXED_DEREF,
+ Values.BYTES_FIXED_SORTED,
+ Values.BYTES_FIXED_STRAIGHT,
+ Values.BYTES_VAR_DEREF ,
+ Values.BYTES_VAR_SORTED,
+ Values.BYTES_VAR_STRAIGHT
+ );
+
+ private static EnumSet STRAIGHT_BYTES = EnumSet.of(
+ Values.BYTES_FIXED_STRAIGHT,
+ Values.BYTES_VAR_STRAIGHT
+ );
+
+ private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE);
+
+ private OpenBitSet indexValues(Random rand, IndexWriter w, int numValues,
+ Values value, List valueVarList, boolean withDeletions,
+ int multOfSeven) throws CorruptIndexException, IOException {
+ final boolean isNumeric = NUMERICS.contains(value);
+ OpenBitSet deleted = new OpenBitSet(numValues);
+ Document doc = new Document();
+ Fieldable fieldable = new AttributeField(value.name());
+ ValuesAttribute valuesAttribute = fieldable.attributes().addAttribute(ValuesAttribute.class);
+ valuesAttribute.setType(value);
+ doc.add(fieldable);
+
+ final IntsRef intsRef = valuesAttribute.ints();
+ final FloatsRef floatsRef = valuesAttribute.floats();
+ final BytesRef bytesRef = valuesAttribute.bytes();
+
+ final String idBase = value.name() + "_";
+ final byte[] b = new byte[multOfSeven];
+ if (bytesRef != null) {
+ bytesRef.bytes = b;
+ bytesRef.length = b.length;
+ bytesRef.offset = 0;
+ }
+
+ byte upto = 0;
+ for (int i = 0; i < numValues; i++) {
+ if (isNumeric) {
+ switch (value) {
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ intsRef.set(i);
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ floatsRef.set(2.0f * i);
+ break;
+ default:
+ fail("unexpected value " + value);
+ }
+ } else {
+ for (int j = 0; j < b.length; j++) {
+ b[j] = upto++;
+ }
+ }
+ doc.removeFields("id");
+ doc.add(new Field("id", idBase + i, Store.YES,
+ Index.NOT_ANALYZED_NO_NORMS));
+ w.addDocument(doc);
+
+ if (i % 7 == 0) {
+ if (withDeletions && rand.nextBoolean()) {
+ Values val = valueVarList.get(rand.nextInt(1 + valueVarList
+ .indexOf(value)));
+ final int randInt = val == value ? rand.nextInt(1 + i) : rand
+ .nextInt(numValues);
+ w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
+ if (val == value) {
+ deleted.set(randInt);
+ }
+ }
+ w.commit();
+
+ }
+ }
+ w.commit();
+
+ // nocommit test unoptimized with deletions
+ if(withDeletions || rand.nextBoolean())
+ w.optimize();
+ return deleted;
+ }
+
+ public void runTestIndexBytes(IndexWriterConfig cfg, Random rand,
+ boolean withDeletions) throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ Directory d = new MockRAMDirectory();
+ IndexWriter w = new IndexWriter(d, cfg);
+ final List byteVariantList = new ArrayList(BYTES);
+
+ // run in random order to test if fill works correctly during merges
+ Collections.shuffle(byteVariantList, rand);
+ final int numValues = 350;
+ for (Values byteIndexValue : byteVariantList) {
+ int bytesSize = 7 + rand.nextInt(128);
+ OpenBitSet deleted = indexValues(rand, w, numValues, byteIndexValue,
+ byteVariantList, withDeletions, bytesSize);
+ final IndexReader r = w.getReader();
+ assertEquals(0, r.numDeletedDocs());
+ List closeables = new ArrayList();
+ final int numRemainingValues = (int) (numValues - deleted.cardinality());
+ final int base = r.numDocs() - numRemainingValues;
+
+ Reader bytesReader = r.getIndexValues(byteIndexValue.name());
+ closeables.add(bytesReader);
+ assertNotNull("field " + byteIndexValue.name()
+ + " returned null reader - maybe merged failed", bytesReader);
+ Source bytes = bytesReader.load();
+ ValuesEnum bytesEnum = bytesReader.getEnum();
+ assertNotNull(bytesEnum);
+ final ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
+ byte upto = 0;
+ // test the filled up slots for correctness
+ for (int i = 0; i < base; i++) {
+ BytesRef br = bytes.bytes(i);
+ String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ + " base: " + base + " numDocs:" + r.numDocs();
+ switch (byteIndexValue) {
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_FIXED_STRAIGHT:
+ assertEquals(i, bytesEnum.advance(i));
+ // fixed straight returns bytesref with zero bytes all of fixed
+ // length
+ assertNotNull("expected none null - " + msg, br);
+ if(br.length != 0) {
+ assertEquals("expected zero bytes of length " + bytesSize + " - "
+ + msg, bytesSize, br.length);
+ for (int j = 0; j < br.length; j++) {
+ assertEquals("Byte at index " + j + " doesn't match - " + msg, 0,
+ br.bytes[br.offset + j]);
+ }
+ }
+ break;
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_DEREF:
+ case BYTES_FIXED_DEREF:
+ default:
+ assertNotNull("expected none null - " + msg, br);
+ assertEquals("expected empty bytes -" + msg, 0, br.length);
+ }
+ }
+ final BytesRef enumRef = attr.bytes();
+
+
+ // test the actual doc values added in this iteration
+ assertEquals(base + numRemainingValues, r.numDocs());
+ int v = 0;
+ for (int i = base; i < r.numDocs(); i++) {
+
+ String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize;
+ while (withDeletions && deleted.get(v++)) {
+ upto += bytesSize;
+ }
+
+ BytesRef br = bytes.bytes(i);
+ if(bytesEnum.docID() != i)
+ assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum.advance(i));
+ for (int j = 0; j < br.length; j++, upto++) {
+ assertEquals("EnumRef Byte at index " + j + " doesn't match - " + msg,
+ upto, enumRef.bytes[enumRef.offset + j]);
+ assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg,
+ upto, br.bytes[br.offset + j]);
+ }
+ }
+
+ // clean up
+ closeables.add(r);
+ for (Closeable toClose : closeables) {
+ toClose.close();
+ }
+ }
+ w.close();
+ d.close();
+ }
+
+}
Property changes on: src/test/org/apache/lucene/index/values/TestIndexValues.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: src/test/org/apache/lucene/store/MockRAMDirectory.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMDirectory.java (revision 983076)
+++ src/test/org/apache/lucene/store/MockRAMDirectory.java (working copy)
@@ -19,9 +19,13 @@
import java.io.IOException;
import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
+import java.util.List;
import java.util.Random;
import java.util.Map;
import java.util.HashMap;
@@ -53,6 +57,8 @@
// member initialization vs when it calls super. It seems
// like super is called, then our members are initialized:
Map openFiles;
+ Set openInputStreams = new HashSet();
+
private synchronized void init() {
if (openFiles == null)
@@ -229,7 +235,8 @@
fileMap.put(name, file);
}
- return new MockRAMOutputStream(this, file, name);
+ MockRAMOutputStream stream = new MockRAMOutputStream(this, file, name);
+ return stream;
}
@Override
@@ -246,7 +253,9 @@
openFiles.put(name, Integer.valueOf(1));
}
}
- return new MockRAMInputStream(this, name, file);
+ MockRAMInputStream stream = new MockRAMInputStream(this, name, file);
+ openInputStreams.add(stream);
+ return stream;
}
/** Provided for testing purposes. Use sizeInBytes() instead. */
@@ -279,7 +288,14 @@
if (noDeleteOpenFile && openFiles.size() > 0) {
// RuntimeException instead of IOException because
// super() does not throw IOException currently:
- throw new RuntimeException("MockRAMDirectory: cannot close: there are still open files: " + openFiles);
+ Set streams = this.openInputStreams;
+ StringWriter sw = new StringWriter();
+ PrintWriter pw = new PrintWriter(sw);
+ for (MockRAMInputStream stream : streams) {
+ stream.ex.printStackTrace(pw);
+ pw.println();
+ }
+ throw new RuntimeException("MockRAMDirectory: cannot close: there are still open files: " + openFiles +" opened at: " + sw.toString() );
}
}
Index: src/test/org/apache/lucene/store/MockRAMInputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMInputStream.java (revision 983076)
+++ src/test/org/apache/lucene/store/MockRAMInputStream.java (working copy)
@@ -28,6 +28,7 @@
private MockRAMDirectory dir;
private String name;
private boolean isClone;
+ Exception ex;
/** Construct an empty output buffer.
* @throws IOException */
@@ -35,6 +36,8 @@
super(f);
this.name = name;
this.dir = dir;
+ // store the stacktrace
+ ex = new Exception();
}
@Override
@@ -45,6 +48,7 @@
// all clones get closed:
if (!isClone) {
synchronized(dir) {
+ dir.openInputStreams.remove(this);
Integer v = dir.openFiles.get(name);
// Could be null when MockRAMDirectory.crash() was called
if (v != null) {
Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 983076)
+++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (working copy)
@@ -30,6 +30,8 @@
private MockRAMDirectory dir;
private boolean first=true;
private final String name;
+ //TODO(simonw): finish this
+ final Exception ex;
byte[] singleByte = new byte[1];
@@ -38,6 +40,8 @@
super(f);
this.dir = dir;
this.name = name;
+ // safe the stacktrace
+ ex = new Exception();
}
@Override
Index: src/test/org/apache/lucene/util/_TestUtil.java
===================================================================
--- src/test/org/apache/lucene/util/_TestUtil.java (revision 983076)
+++ src/test/org/apache/lucene/util/_TestUtil.java (working copy)
@@ -116,6 +116,37 @@
}
return new String(buffer, 0, end);
}
+
+ public static String randomUnicodeString(Random r, int minLength, int maxLength) {
+ if(minLength > maxLength)
+ throw new IllegalArgumentException("minLength must be >= maxLength");
+ final boolean lenEqual = minLength==maxLength;
+ final int end = lenEqual?minLength:minLength + r.nextInt(maxLength-minLength+1);
+ if (end == 0) {
+ // allow 0 length
+ return "";
+ }
+
+ // TODO(simonw): check this
+ final int fixedPlane = 5;//minLength % 5;
+ final char[] buffer = new char[end];
+ for (int i = 0; i < end; i++) {
+ int t = lenEqual? fixedPlane: r.nextInt(5);
+ //buffer[i] = (char) (97 + r.nextInt(26));
+ if (0 == t && i < end - 1 && !lenEqual) {
+ // Make a surrogate pair
+ // High surrogate
+ buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff);
+ // Low surrogate
+ buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff);
+ }
+ else if (t <= 1) buffer[i] = (char) r.nextInt(0x80);
+ else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800);
+ else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff);
+ else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff);
+ }
+ return new String(buffer, 0, end);
+ }
private static final int[] blockStarts = {
0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400,
Index: src/test/org/apache/lucene/util/packed/PackedReaderIteratorTest.java
===================================================================
--- src/test/org/apache/lucene/util/packed/PackedReaderIteratorTest.java (revision 0)
+++ src/test/org/apache/lucene/util/packed/PackedReaderIteratorTest.java (revision 0)
@@ -0,0 +1,77 @@
+package org.apache.lucene.util.packed;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MockRAMDirectory;
+import org.apache.lucene.util.LuceneTestCaseJ4;
+import org.junit.Test;
+import static org.junit.Assert.*;
+/**
+ *
+ *
+ */
+public class PackedReaderIteratorTest extends LuceneTestCaseJ4 {
+
+ @Test
+ public void testNext() throws IOException {
+ Random rnd = newRandom();
+ for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
+ long ceil = 2;
+ for(int nbits=1;nbits<63;nbits++) {
+ final int valueCount = 100+rnd.nextInt(500);
+ final Directory d = new MockRAMDirectory();
+
+ IndexOutput out = d.createOutput("out.bin");
+ PackedInts.Writer w = PackedInts.getWriter(
+ out, valueCount, nbits);
+
+ final long[] values = new long[valueCount];
+ for(int i=0;i