Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (revision 1534125) +++ lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (working copy) @@ -186,6 +186,44 @@ directory.close(); } + public void testTwoBinaryValues() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); + Document doc = new Document(); + String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; + String text = "This is the text to be indexed. " + longTerm; + doc.add(newTextField("fieldname", text, Field.Store.YES)); + doc.add(new BinaryDocValuesField("dv1", new BytesRef(longTerm))); + doc.add(new BinaryDocValuesField("dv2", new BytesRef(text))); + iwriter.addDocument(doc); + iwriter.close(); + + // Now search the index: + IndexReader ireader = DirectoryReader.open(directory); // read-only=true + IndexSearcher isearcher = new IndexSearcher(ireader); + + assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); + Query query = new TermQuery(new Term("fieldname", "text")); + TopDocs hits = isearcher.search(query, null, 1); + assertEquals(1, hits.totalHits); + // Iterate through the results: + for (int i = 0; i < hits.scoreDocs.length; i++) { + StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc); + assertEquals(text, hitDoc.get("fieldname")); + assert ireader.leaves().size() == 1; + BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv1"); + BytesRef scratch = new BytesRef(); + dv.get(hits.scoreDocs[i].doc, scratch); + assertEquals(new BytesRef(longTerm), scratch); + dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv2"); + dv.get(hits.scoreDocs[i].doc, scratch); + assertEquals(new BytesRef(text), scratch); + } + + ireader.close(); + directory.close(); + } + public void testTwoFieldsMixed() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); @@ -2926,12 +2964,12 @@ if (values.length > 0) { assertNotNull(sortedSet); sortedSet.setDocument(j); - for (int i = 0; i < values.length; i++) { + for (int k = 0; k < values.length; k++) { long ord = sortedSet.nextOrd(); assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS); BytesRef value = new BytesRef(); sortedSet.lookupOrd(ord, value); - assertEquals(values[i], value.utf8ToString()); + assertEquals(values[k], value.utf8ToString()); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); assertTrue(sortedSetBits.get(j)); Index: lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectDocValuesFormat.java =================================================================== --- lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectDocValuesFormat.java (revision 0) +++ lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectDocValuesFormat.java (working copy) @@ -0,0 +1,34 @@ +package org.apache.lucene.codecs.memory; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BaseDocValuesFormatTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Tests DirectDocValuesFormat + */ +public class TestDirectDocValuesFormat extends BaseDocValuesFormatTestCase { + private final Codec codec = _TestUtil.alwaysDocValuesFormat(new DirectDocValuesFormat()); + + @Override + protected Codec getCodec() { + return codec; + } +} Property changes on: lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectDocValuesFormat.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (revision 0) +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (working copy) @@ -0,0 +1,478 @@ +package org.apache.lucene.codecs.memory; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Reader for {@link DirectDocValuesFormat} + */ + +class DirectDocValuesProducer extends DocValuesProducer { + // metadata maps (just file pointers and minimal stuff) + private final Map numerics = new HashMap(); + private final Map binaries = new HashMap(); + private final Map sorteds = new HashMap(); + private final Map sortedSets = new HashMap(); + private final IndexInput data; + + // ram instances we have already loaded + private final Map numericInstances = + new HashMap(); + private final Map binaryInstances = + new HashMap(); + private final Map sortedInstances = + new HashMap(); + private final Map sortedSetInstances = + new HashMap(); + private final Map docsWithFieldInstances = new HashMap(); + + private final int maxDoc; + + static final byte NUMBER = 0; + static final byte BYTES = 1; + static final byte SORTED = 2; + static final byte SORTED_SET = 3; + + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { + maxDoc = state.segmentInfo.getDocCount(); + String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + // read in the entries from the metadata file. + IndexInput in = state.directory.openInput(metaName, state.context); + boolean success = false; + final int version; + try { + version = CodecUtil.checkHeader(in, metaCodec, + VERSION_START, + VERSION_CURRENT); + readFields(in); + + success = true; + } finally { + if (success) { + IOUtils.close(in); + } else { + IOUtils.closeWhileHandlingException(in); + } + } + + success = false; + try { + String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); + data = state.directory.openInput(dataName, state.context); + final int version2 = CodecUtil.checkHeader(data, dataCodec, + VERSION_START, + VERSION_CURRENT); + if (version != version2) { + throw new CorruptIndexException("Format versions mismatch"); + } + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(this.data); + } + } + } + + private NumericEntry readNumericEntry(IndexInput meta) throws IOException { + NumericEntry entry = new NumericEntry(); + entry.offset = meta.readLong(); + entry.count = meta.readInt(); + entry.missingOffset = meta.readLong(); + if (entry.missingOffset != -1) { + entry.missingBytes = meta.readLong(); + } else { + entry.missingBytes = 0; + } + entry.byteWidth = meta.readByte(); + + return entry; + } + + private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException { + BinaryEntry entry = new BinaryEntry(); + entry.offset = meta.readLong(); + entry.numBytes = meta.readInt(); + entry.count = meta.readInt(); + entry.missingOffset = meta.readLong(); + if (entry.missingOffset != -1) { + entry.missingBytes = meta.readLong(); + } else { + entry.missingBytes = 0; + } + + return entry; + } + + private SortedEntry readSortedEntry(IndexInput meta) throws IOException { + SortedEntry entry = new SortedEntry(); + entry.docToOrd = readNumericEntry(meta); + entry.values = readBinaryEntry(meta); + return entry; + } + + private SortedSetEntry readSortedSetEntry(IndexInput meta) throws IOException { + SortedSetEntry entry = new SortedSetEntry(); + entry.docToOrdAddress = readNumericEntry(meta); + entry.ords = readNumericEntry(meta); + entry.values = readBinaryEntry(meta); + return entry; + } + + private void readFields(IndexInput meta) throws IOException { + int fieldNumber = meta.readVInt(); + while (fieldNumber != -1) { + int fieldType = meta.readByte(); + if (fieldType == NUMBER) { + numerics.put(fieldNumber, readNumericEntry(meta)); + } else if (fieldType == BYTES) { + binaries.put(fieldNumber, readBinaryEntry(meta)); + } else if (fieldType == SORTED) { + sorteds.put(fieldNumber, readSortedEntry(meta)); + } else if (fieldType == SORTED_SET) { + sortedSets.put(fieldNumber, readSortedSetEntry(meta)); + } else { + throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); + } + fieldNumber = meta.readVInt(); + } + } + + @Override + public long ramBytesUsed() { + // TODO: optimize me + return RamUsageEstimator.sizeOf(this); + } + + @Override + public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException { + NumericDocValues instance = numericInstances.get(field.number); + if (instance == null) { + // Lazy load + instance = loadNumeric(numerics.get(field.number)); + numericInstances.put(field.number, instance); + } + return instance; + } + + private NumericDocValues loadNumeric(NumericEntry entry) throws IOException { + data.seek(entry.offset + entry.missingBytes); + switch (entry.byteWidth) { + case 1: + { + final byte[] values = new byte[entry.count]; + for(int i=0;i> 3]; + for (int i = 0; i < bits.length; i++) { + bits[i] = data.readLong(); + } + instance = new FixedBitSet(bits, maxDoc); + docsWithFieldInstances.put(fieldNumber, instance); + } + } + return instance; + } + } + + @Override + public Bits getDocsWithField(FieldInfo field) throws IOException { + switch(field.getDocValuesType()) { + case SORTED_SET: + return new SortedSetDocsWithField(getSortedSet(field), maxDoc); + case SORTED: + return new SortedDocsWithField(getSorted(field), maxDoc); + case BINARY: + BinaryEntry be = binaries.get(field.number); + return getMissingBits(field.number, be.missingOffset, be.missingBytes); + case NUMERIC: + NumericEntry ne = numerics.get(field.number); + return getMissingBits(field.number, ne.missingOffset, ne.missingBytes); + default: + throw new AssertionError(); + } + } + + @Override + public void close() throws IOException { + data.close(); + } + + static class SortedSetRawValues { + NumericDocValues docToOrdAddress; + NumericDocValues ords; + BinaryDocValues values; + } + + static class NumericEntry { + long offset; + int count; + long missingOffset; + long missingBytes; + byte byteWidth; + int packedIntsVersion; + } + + static class BinaryEntry { + long offset; + long missingOffset; + long missingBytes; + int count; + int numBytes; + int minLength; + int maxLength; + int packedIntsVersion; + int blockSize; + } + + static class SortedEntry { + NumericEntry docToOrd; + BinaryEntry values; + } + + static class SortedSetEntry { + NumericEntry docToOrdAddress; + NumericEntry ords; + BinaryEntry values; + } + + static class FSTEntry { + long offset; + long numOrds; + } +} Property changes on: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesFormat.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesFormat.java (revision 0) +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesFormat.java (working copy) @@ -0,0 +1,77 @@ +package org.apache.lucene.codecs.memory; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +/** In-memory docvalues format that does no (or very little) + * compression. Indexed values are stored on disk, but + * then at search time all values are loaded into memory as + * simple java arrays. For numeric values, it uses + * byte[], short[], int[], long[] as necessary to fit the + * range of the values. For binary values, there is an int + * (4 bytes) overhead per value. + * + *

Limitations: + *

    + *
  • For binary and sorted fields the total space + * required for all binary values cannot exceed about + * 2.1 GB (see #MAX_TOTAL_BYTES_LENGTH).
  • + * + *
  • For sorted set fields, the sum of the size of each + * document's set of values cannot exceed about 2.1 + * B (see #MAX_SORTED_SET_ORDS).
  • + *
*/ + +public class DirectDocValuesFormat extends DocValuesFormat { + + /** The sum of all byte lengths for binary field, or for + * the unique values in sorted or sorted set fields, cannot + * exceed this. */ + public final static int MAX_TOTAL_BYTES_LENGTH = Integer.MAX_VALUE - 200; + + /** The sum of the number of values across all documents + * in a sorted set field cannot exceed this. */ + public final static int MAX_SORTED_SET_ORDS = Integer.MAX_VALUE - 200; + + /** Sole constructor. */ + public DirectDocValuesFormat() { + super("Direct"); + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + return new DirectDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION); + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return new DirectDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION); + } + + static final String DATA_CODEC = "DirectDocValuesData"; + static final String DATA_EXTENSION = "dvdd"; + static final String METADATA_CODEC = "DirectDocValuesMetadata"; + static final String METADATA_EXTENSION = "dvdm"; +} Property changes on: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesFormat.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (revision 0) +++ lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (working copy) @@ -0,0 +1,302 @@ +package org.apache.lucene.codecs.memory; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT; +import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES; +import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED; +import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET; +import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER; + +/** + * Writer for {@link DirectDocValuesFormat} + */ + +class DirectDocValuesConsumer extends DocValuesConsumer { + final IndexOutput data, meta; + final int maxDoc; + + DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { + maxDoc = state.segmentInfo.getDocCount(); + boolean success = false; + try { + String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); + data = state.directory.createOutput(dataName, state.context); + CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT); + String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + meta = state.directory.createOutput(metaName, state.context); + CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public void addNumericField(FieldInfo field, Iterable values) throws IOException { + meta.writeVInt(field.number); + meta.writeByte(NUMBER); + addNumericFieldValues(field, values); + } + + private void addNumericFieldValues(FieldInfo field, Iterable values) throws IOException { + meta.writeLong(data.getFilePointer()); + long minValue = Long.MAX_VALUE; + long maxValue = Long.MIN_VALUE; + boolean missing = false; + + long count = 0; + long sum = 0; + for (Number nv : values) { + if (nv != null) { + long v = nv.longValue(); + sum += v; + minValue = Math.min(minValue, v); + maxValue = Math.max(maxValue, v); + } else { + missing = true; + } + count++; + if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) { + throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + " values/total ords"); + } + } + meta.writeInt((int) count); + + if (missing) { + long start = data.getFilePointer(); + writeMissingBitset(values); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + } else { + meta.writeLong(-1L); + } + + byte byteWidth; + if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) { + byteWidth = 1; + } else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) { + byteWidth = 2; + } else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) { + byteWidth = 4; + } else { + byteWidth = 8; + } + meta.writeByte(byteWidth); + + sum = 0; + for (Number nv : values) { + long v; + if (nv != null) { + v = nv.longValue(); + } else { + v = 0; + } + + switch(byteWidth) { + case 1: + data.writeByte((byte) v); + break; + case 2: + data.writeShort((short) v); + break; + case 4: + data.writeInt((int) v); + break; + case 8: + data.writeLong(v); + break; + } + } + } + + @Override + public void close() throws IOException { + boolean success = false; + try { + if (meta != null) { + meta.writeVInt(-1); // write EOF marker + } + success = true; + } finally { + if (success) { + IOUtils.close(data, meta); + } else { + IOUtils.closeWhileHandlingException(data, meta); + } + } + } + + @Override + public void addBinaryField(FieldInfo field, final Iterable values) throws IOException { + meta.writeVInt(field.number); + meta.writeByte(BYTES); + addBinaryFieldValues(field, values); + } + + private void addBinaryFieldValues(FieldInfo field, final Iterable values) throws IOException { + // write the byte[] data + final long startFP = data.getFilePointer(); + boolean missing = false; + long totalBytes = 0; + int count = 0; + for(BytesRef v : values) { + if (v != null) { + data.writeBytes(v.bytes, v.offset, v.length); + totalBytes += v.length; + if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) { + throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes"); + } + } else { + missing = true; + } + count++; + } + + meta.writeLong(startFP); + meta.writeInt((int) totalBytes); + meta.writeInt(count); + if (missing) { + long start = data.getFilePointer(); + writeMissingBitset(values); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + } else { + meta.writeLong(-1L); + } + + int addr = 0; + for (BytesRef v : values) { + data.writeInt(addr); + if (v != null) { + addr += v.length; + } + } + data.writeInt(addr); + } + + // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on, + // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode) + void writeMissingBitset(Iterable values) throws IOException { + long bits = 0; + int count = 0; + for (Object v : values) { + if (count == 64) { + data.writeLong(bits); + count = 0; + bits = 0; + } + if (v != null) { + bits |= 1L << (count & 0x3f); + } + count++; + } + if (count > 0) { + data.writeLong(bits); + } + } + + @Override + public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + meta.writeVInt(field.number); + meta.writeByte(SORTED); + + // write the ordinals as numerics + addNumericFieldValues(field, docToOrd); + + // write the values as binary + addBinaryFieldValues(field, values); + } + + // note: this might not be the most efficient... but its fairly simple + @Override + public void addSortedSetField(FieldInfo field, Iterable values, final Iterable docToOrdCount, final Iterable ords) throws IOException { + meta.writeVInt(field.number); + meta.writeByte(SORTED_SET); + + // First write docToOrdCounts, except we "aggregate" the + // counts so they turn into addresses, and add a final + // value = the total aggregate: + addNumericFieldValues(field, new Iterable() { + + // Just aggregates the count values so they become + // "addresses", and adds one more value in the end + // (the final sum): + + @Override + public Iterator iterator() { + final Iterator iter = docToOrdCount.iterator(); + + return new Iterator() { + + long sum; + boolean ended; + + @Override + public boolean hasNext() { + return iter.hasNext() || !ended; + } + + @Override + public Number next() { + long toReturn = sum; + + if (iter.hasNext()) { + Number n = iter.next(); + if (n != null) { + sum += n.longValue(); + } + } else if (!ended) { + ended = true; + } else { + assert false; + } + + return toReturn; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }); + + // Write ordinals for all docs, appended into one big + // numerics: + addNumericFieldValues(field, ords); + + // write the values as binary + addBinaryFieldValues(field, values); + } +} Property changes on: lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat =================================================================== --- lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (revision 1534125) +++ lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (working copy) @@ -15,4 +15,5 @@ org.apache.lucene.codecs.diskdv.DiskDocValuesFormat org.apache.lucene.codecs.memory.MemoryDocValuesFormat +org.apache.lucene.codecs.memory.DirectDocValuesFormat org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat