Index: lucene/src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldInfo.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -86,7 +86,7 @@ public int getCodecId() { return codecId; } - + @Override public Object clone() { FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, @@ -132,6 +132,12 @@ } } + public void resetDocValues(ValueType v) { + if (docValues != null) { + docValues = v; + } + } + public boolean hasDocValues() { return docValues != null; } Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -33,7 +33,6 @@ import org.apache.lucene.index.codecs.FieldsWriter; import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.PerDocConsumer; -import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -141,6 +140,8 @@ if (fieldInfos.hasVectors()) { mergeVectors(); } + // write FIS once merge is done. IDV might change types or drops fields + fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); return mergedDocs; } @@ -254,7 +255,6 @@ } } final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false); - fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); int docCount = 0; @@ -584,28 +584,11 @@ } private void mergePerDoc() throws IOException { - final List perDocProducers = new ArrayList(); - final List perDocSlices = new ArrayList(); - int docBase = 0; - for (MergeState.IndexReaderAndLiveDocs r : readers) { - final int maxDoc = r.reader.maxDoc(); - final PerDocValues producer = r.reader.perDocValues(); - if (producer != null) { - perDocSlices.add(new ReaderUtil.Slice(docBase, maxDoc, perDocProducers - .size())); - perDocProducers.add(producer); - } - docBase += maxDoc; - } - if (!perDocSlices.isEmpty()) { final PerDocConsumer docsConsumer = codec .docsConsumer(new PerDocWriteState(segmentWriteState)); boolean success = false; try { - final MultiPerDocValues multiPerDocValues = new MultiPerDocValues( - perDocProducers.toArray(PerDocValues.EMPTY_ARRAY), - perDocSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)); - docsConsumer.merge(mergeState, multiPerDocValues); + docsConsumer.merge(mergeState); success = true; } finally { if (success) { @@ -614,11 +597,8 @@ IOUtils.closeWhileHandlingException(docsConsumer); } } - } - /* don't close the perDocProducers here since they are private segment producers - * and will be closed once the SegmentReader goes out of scope */ } - + private MergeState mergeState; public boolean getAnyNonBulkMerges() { Index: lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java (working copy) @@ -92,48 +92,38 @@ * * @param mergeState * the state to merge - * @param values - * the docValues to merge in + * @param docValues docValues array containing one instance per reader ( + * {@link MergeState#readers}) or null if the reader has + * no {@link IndexDocValues} instance. * @throws IOException * if an {@link IOException} occurs */ - public void merge(org.apache.lucene.index.codecs.MergeState mergeState, - IndexDocValues values) throws IOException { + public void merge(MergeState mergeState, IndexDocValues[] docValues) throws IOException { assert mergeState != null; - // TODO we need some kind of compatibility notation for values such - // that two slightly different segments can be merged eg. fixed vs. - // variable byte len or float32 vs. float64 - boolean merged = false; - /* - * We ignore the given DocValues here and merge from the subReaders directly - * to support bulk copies on the DocValues Writer level. if this gets merged - * with MultiDocValues the writer can not optimize for bulk-copyable data - */ + boolean hasMerged = false; for(int readerIDX=0;readerIDX fields = producer.fields(); - for (String field : fields) { - mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); - assert mergeState.fieldInfo != null : "FieldInfo for field is null: " - + field; - if (mergeState.fieldInfo.hasDocValues()) { - final IndexDocValues docValues = producer.docValues(field); - if (docValues == null) { - /* - * It is actually possible that a fieldInfo has a values type but no - * values are actually available. this can happen if there are already - * segments without values around. - */ + FieldInfos fieldInfos = mergeState.fieldInfos; + final IndexDocValues[] docValues = new IndexDocValues[mergeState.readers.size()]; + final PerDocValues[] perDocValues = new PerDocValues[mergeState.readers.size()]; + for (int i = 0; i < perDocValues.length; i++) { + perDocValues[i] = mergeState.readers.get(i).reader.perDocValues(); + } + for (FieldInfo fieldInfo : fieldInfos) { + mergeState.fieldInfo = fieldInfo; + TypePromoter promoter = TypePromoter.IDENTITY_PROMOTER; + if (fieldInfo.hasDocValues()) { + for (int i = 0; i < perDocValues.length; i++) { + docValues[i] = null; + if (perDocValues[i] != null) { + docValues[i] = perDocValues[i].docValues(fieldInfo.name); + if (docValues[i] != null) { + ValueType type = docValues[i].type(); + final int size = docValues[i].getValueSize(); + if (promoter != TypePromoter.IDENTITY_PROMOTER) { + System.out.println(promoter + " and + " + TypePromoter.create(type, size) + " to: " + promoter.promote(TypePromoter.create(type, size))); + } + promoter = promoter.promote(TypePromoter.create(type, size)); + } + } + if (promoter == null) { + break; + } + } + if (promoter == null) { + fieldInfo.resetDocValues(null); continue; } + assert promoter != TypePromoter.IDENTITY_PROMOTER; + if (fieldInfo.getDocValues() != promoter.type()) { + fieldInfo.resetDocValues(promoter.type()); + } + final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo); assert docValuesConsumer != null; docValuesConsumer.merge(mergeState, docValues); } } + /* NOTE: don't close the perDocProducers here since they are private segment producers + * and will be closed once the SegmentReader goes out of scope */ } + } Index: lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java (working copy) @@ -86,6 +86,12 @@ throws IOException { return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type()); } + + @Override + public int getValueSize() { + return size; + } + } static final class FixedDerefSource extends BytesSourceBase { Index: lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (working copy) @@ -104,6 +104,11 @@ return new DirectFixedSortedSource(cloneData(), cloneIndex(), size, valueCount, comparator, type); } + + @Override + public int getValueSize() { + return size; + } } static final class FixedSortedSource extends BytesSortedSourceBase { Index: lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java (working copy) @@ -59,6 +59,7 @@ int version, Counter bytesUsed, IOContext context) throws IOException { super(dir, id, codecName, version, bytesUsed, context); pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)); + pool.nextBuffer(); } @Override @@ -70,7 +71,6 @@ throw new IllegalArgumentException("bytes arrays > " + Short.MAX_VALUE + " are not supported"); } size = bytes.length; - pool.nextBuffer(); } else if (bytes.length != size) { throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length); @@ -120,7 +120,7 @@ } static class Writer extends FixedBytesWriterBase { - private boolean merge; + private boolean hasMerged; private IndexOutput datOut; public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException { @@ -133,12 +133,15 @@ @Override - protected void merge(MergeState state) throws IOException { - merge = true; + protected void merge(SingleSubMergeState state) throws IOException { datOut = getOrCreateDataOut(); boolean success = false; try { - if (state.liveDocs == null && state.reader instanceof FixedStraightReader ) { + if (!hasMerged && size != -1) { + datOut.writeInt(size); + } + + if (state.liveDocs == null && tryBulkMerge(state.reader)) { FixedStraightReader reader = (FixedStraightReader) state.reader; final int maxDocs = reader.maxDoc; if (maxDocs == 0) { @@ -172,24 +175,33 @@ if (!success) { IOUtils.closeWhileHandlingException(datOut); } + hasMerged = true; } } + protected boolean tryBulkMerge(IndexDocValues docValues) { + return docValues instanceof FixedStraightReader; + } + @Override protected void mergeDoc(int docID, int sourceDoc) throws IOException { assert lastDocID < docID; - currentMergeSource.getBytes(sourceDoc, bytesRef); + setMergeBytes(sourceDoc); if (size == -1) { size = bytesRef.length; datOut.writeInt(size); } - assert size == bytesRef.length; + assert size == bytesRef.length : "size: " + size + " ref: " + bytesRef.length; if (lastDocID+1 < docID) { fill(datOut, docID); } datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); lastDocID = docID; } + + protected void setMergeBytes(int sourceDoc) { + currentMergeSource.getBytes(sourceDoc, bytesRef); + } @@ -203,7 +215,7 @@ public void finish(int docCount) throws IOException { boolean success = false; try { - if (!merge) { + if (!hasMerged) { // indexing path - no disk IO until here assert datOut == null; datOut = getOrCreateDataOut(); @@ -267,6 +279,11 @@ public Source getDirectSource() throws IOException { return new DirectFixedStraightSource(cloneData(), size, type()); } + + @Override + public int getValueSize() { + return size; + } } // specialized version for single bytes Index: lucene/src/java/org/apache/lucene/index/values/Floats.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Floats.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/Floats.java (working copy) @@ -85,6 +85,18 @@ public void add(int docID, PerDocFieldValues docValues) throws IOException { add(docID, docValues.getFloat()); } + + @Override + protected boolean tryBulkMerge(IndexDocValues docValues) { + // only bulk merge if value type is the same otherwise size differs + return super.tryBulkMerge(docValues) && docValues.type() == template.type(); + } + + @Override + protected void setMergeBytes(int sourceDoc) { + final double value = currentMergeSource.getFloat(sourceDoc); + template.toBytes(value, bytesRef); + } } final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader { Index: lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java (working copy) @@ -107,6 +107,17 @@ } /** + * Returns the size per value in bytes or -1 iff size per value + * is variable. + * + * @return the size per value in bytes or -1 iff size per value + * is variable. + */ + public int getValueSize() { + return -1; + } + + /** * Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This * method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed * before the new cache is installed. Index: lucene/src/java/org/apache/lucene/index/values/Ints.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Ints.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/Ints.java (working copy) @@ -93,9 +93,9 @@ protected IntsWriter(Directory dir, String id, String codecName, int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException { super(dir, id, codecName, version, bytesUsed, context); - final int expectedSize = typeToSize(valueType); - this.bytesRef = new BytesRef(expectedSize); - bytesRef.length = expectedSize; + size = typeToSize(valueType); + this.bytesRef = new BytesRef(size); + bytesRef.length = size; template = IndexDocValuesArray.TEMPLATES.get(valueType); } @@ -109,6 +109,18 @@ public void add(int docID, PerDocFieldValues docValues) throws IOException { add(docID, docValues.getInt()); } + + @Override + protected void setMergeBytes(int sourceDoc) { + final long value = currentMergeSource.getInt(sourceDoc); + template.toBytes(value, bytesRef); + } + + @Override + protected boolean tryBulkMerge(IndexDocValues docValues) { + // only bulk merge if value type is the same otherwise size differs + return super.tryBulkMerge(docValues) && docValues.type() == template.type(); + } } final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader { Index: lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/MultiIndexDocValues.java (working copy) @@ -46,6 +46,8 @@ private DocValuesIndex[] docValuesIdx; private int[] starts; + private ValueType type; + private int valueSize; public MultiIndexDocValues() { starts = new int[0]; @@ -63,9 +65,17 @@ public IndexDocValues reset(DocValuesIndex[] docValuesIdx) { int[] start = new int[docValuesIdx.length]; + TypePromoter promoter = TypePromoter.IDENTITY_PROMOTER; for (int i = 0; i < docValuesIdx.length; i++) { start[i] = docValuesIdx[i].start; + TypePromoter incomingPromoter = TypePromoter.create(docValuesIdx[i].docValues.type(), docValuesIdx[i].docValues.getValueSize()); + promoter = promoter.promote(incomingPromoter); + if (promoter == null) { + throw new IllegalStateException("Can not promote " + incomingPromoter); + } } + this.type = promoter.type; + this.valueSize = promoter.getValueSize(); this.starts = start; this.docValuesIdx = docValuesIdx; return this; @@ -180,7 +190,12 @@ @Override public ValueType type() { - return this.docValuesIdx[0].docValues.type(); + return type; + } + + @Override + public int getValueSize() { + return valueSize; } @Override Index: lucene/src/java/org/apache/lucene/index/values/TypePromoter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/values/TypePromoter.java (revision 0) @@ -0,0 +1,149 @@ +package org.apache.lucene.index.values; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.HashMap; +import java.util.Map; + +/** + * + * @lucene.internal + */ +//nocommit - javadoc +public class TypePromoter { + + public static final TypePromoter IDENTITY_PROMOTER = new IdentityTypePromoter(); + private static final int IS_INT = 1 << 0; + private static final int IS_BYTE = 1 << 1; + private static final int IS_FLOAT = 1 << 2; + private static final int IS_FIXED = 1 << 3 | 1 << 4; + private static final int IS_VAR = 1 << 4; + private static final int IS_STRAIGHT = 1 << 5; + private static final int IS_DEREF = 1 << 5 | 1 << 6; + private static final int IS_SORTED = 1 << 7; + private static final int IS_8_BIT = 1 << 8 | 1 << 9 | 1 << 10 | 1 << 11; + private static final int IS_16_BIT = 1 << 9 | 1 << 10 | 1 << 11; + private static final int IS_32_BIT = 1 << 10 | 1 << 11; + private static final int IS_64_BIT = 1 << 11; + private static final int PROMOTE_TO_VAR_SIZE_MASK = ~(1 << 3); + protected final ValueType type; + protected final int flags; + protected int valueSize; + + /** + * @return the valueSize + */ + public int getValueSize() { + return valueSize; + } + + final static Map FLAGS_MAP = new HashMap(); + + static { + for (ValueType type : ValueType.values()) { + TypePromoter create = create(type); + FLAGS_MAP.put(create.flags, type); + } + } + + public TypePromoter(ValueType type, int flags, int valueSize) { + super(); + this.type = type; + this.flags = flags; + this.valueSize = valueSize; + } + + public static TypePromoter create(ValueType type) { + return create(type, -1); + } + + public static TypePromoter create(ValueType type, int valueSize) { + if (type == null) { + return null; + } + switch (type) { + case BYTES_FIXED_DEREF: + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_DEREF, valueSize); + case BYTES_FIXED_SORTED: + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_SORTED, valueSize); + case BYTES_FIXED_STRAIGHT: + return new TypePromoter(type, IS_BYTE | IS_FIXED | IS_STRAIGHT, valueSize); + case BYTES_VAR_DEREF: + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_DEREF, valueSize); + case BYTES_VAR_SORTED: + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_SORTED, valueSize); + case BYTES_VAR_STRAIGHT: + return new TypePromoter(type, IS_BYTE | IS_VAR | IS_STRAIGHT, valueSize); + case FIXED_INTS_16: + return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_16_BIT, valueSize); + case FIXED_INTS_32: + return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_32_BIT, valueSize); + case FIXED_INTS_64: + return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_64_BIT, valueSize); + case FIXED_INTS_8: + return new TypePromoter(type, IS_INT | IS_FIXED | IS_STRAIGHT | IS_8_BIT, valueSize); + case FLOAT_32: + return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT + | IS_32_BIT, valueSize); + case FLOAT_64: + return new TypePromoter(type, IS_FLOAT | IS_FIXED | IS_STRAIGHT + | IS_64_BIT, valueSize); + case VAR_INTS: + return new TypePromoter(type, IS_INT | IS_VAR | IS_STRAIGHT, valueSize); + default: + throw new IllegalStateException(); + } + } + + public TypePromoter promote(TypePromoter promoter) { + + int promotedFlags = promoter.flags & this.flags; + TypePromoter promoted = create(FLAGS_MAP.get(promotedFlags), valueSize); + if (promoted == null) { + return promoted; + } + if ((promoted.flags & IS_BYTE) != 0 && (promoted.flags & IS_FIXED) != 0) { + if (this.valueSize == promoter.valueSize) { + return promoted; + } + return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK)); + } + return promoted; + + } + + public ValueType type() { + return type; + } + + @Override + public String toString() { + return "TypePromoter [type=" + type + ", sizeInBytes=" + valueSize + "]"; + } + + private static class IdentityTypePromoter extends TypePromoter { + + public IdentityTypePromoter() { + super(null, 0, -1); + } + + @Override + public TypePromoter promote(TypePromoter promoter) { + return promoter; + } + } +} \ No newline at end of file Property changes on: lucene/src/java/org/apache/lucene/index/values/TypePromoter.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java (working copy) @@ -93,7 +93,7 @@ } @Override - protected void merge(MergeState state) throws IOException { + protected void merge(SingleSubMergeState state) throws IOException { merge = true; datOut = getOrCreateDataOut(); boolean success = false; Index: lucene/src/java/org/apache/lucene/index/values/Writer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Writer.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/Writer.java (working copy) @@ -138,7 +138,7 @@ public abstract void finish(int docCount) throws IOException; @Override - protected void merge(MergeState state) throws IOException { + protected void merge(SingleSubMergeState state) throws IOException { // This enables bulk copies in subclasses per MergeState, subclasses can // simply override this and decide if they want to merge // segments using this generic implementation or if a bulk merge is possible Index: lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java =================================================================== --- lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (revision 0) +++ lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java (revision 0) @@ -0,0 +1,286 @@ +package org.apache.lucene.index.values; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.IndexDocValuesField; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.values.IndexDocValues.Source; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +public class TestTypePromotion extends LuceneTestCase { + @Before + public void setUp() throws Exception { + super.setUp(); + assumeFalse("cannot work with preflex codec", CodecProvider.getDefault() + .getDefaultFieldCodec().equals("PreFlex")); + } + + private static EnumSet INTEGERS = EnumSet.of(ValueType.VAR_INTS, + ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32, + ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_8); + + private static EnumSet FLOATS = EnumSet.of(ValueType.FLOAT_32, + ValueType.FLOAT_64); + + private static EnumSet UNSORTED_BYTES = EnumSet.of( + ValueType.BYTES_FIXED_DEREF, ValueType.BYTES_FIXED_STRAIGHT, + ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_DEREF); + + private static EnumSet SORTED_BYTES = EnumSet.of( + ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED); + + public ValueType randomValueType(EnumSet typeEnum, Random random) { + ValueType[] array = typeEnum.toArray(new ValueType[0]); + return array[random.nextInt(array.length)]; + } + + private static enum TestType { + Int, Float, Byte + } + + private void runTest(EnumSet types, TestType type) + throws CorruptIndexException, IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, + writerConfig(random.nextBoolean())); + int num_1 = atLeast(200); + int num_2 = atLeast(200); + int num_3 = atLeast(200); + long[] values = new long[num_1 + num_2 + num_3]; + index(writer, new IndexDocValuesField("promote"), + randomValueType(types, random), values, 0, num_1); + writer.commit(); + + index(writer, new IndexDocValuesField("promote"), + randomValueType(types, random), values, num_1, num_2); + writer.commit(); + + if (random.nextInt(4) == 0) { + // once in a while use addIndexes + writer.optimize(); + + Directory dir_2 = newDirectory() ; + IndexWriter writer_2 = new IndexWriter(dir_2, + writerConfig(random.nextBoolean())); + index(writer_2, new IndexDocValuesField("promote"), + randomValueType(types, random), values, num_1 + num_2, num_3); + writer_2.commit(); + writer_2.close(); + if (random.nextBoolean()) { + writer.addIndexes(dir_2); + } else { + // do a real merge here + IndexReader open = IndexReader.open(dir_2); + writer.addIndexes(open); + open.close(); + } + dir_2.close(); + } else { + index(writer, new IndexDocValuesField("promote"), + randomValueType(types, random), values, num_1 + num_2, num_3); + } + + writer.optimize(); + writer.close(); + IndexReader reader = IndexReader.open(dir); + assertTrue(reader.isOptimized()); + ReaderContext topReaderContext = reader.getTopReaderContext(); + ReaderContext[] children = topReaderContext.children(); + IndexDocValues docValues = children[0].reader.docValues("promote"); + assertEquals(1, children.length); + Source directSource = docValues.getDirectSource(); + for (int i = 0; i < values.length; i++) { + switch (type) { + case Byte: + assertEquals("" + i, values[i], directSource.getBytes(i, new BytesRef()).length); + break; + case Float: + assertEquals("" + i, Double.longBitsToDouble(values[i]), directSource.getFloat(i), 0.0); + break; + case Int: + assertEquals("" + i, values[i], directSource.getInt(i)); + default: + break; + } + + } + docValues.close(); + reader.close(); + dir.close(); + } + + private IndexWriterConfig writerConfig(boolean useCompoundFile) { + final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)); + cfg.setMergePolicy(newLogMergePolicy(random)); + LogMergePolicy policy = new LogDocMergePolicy(); + cfg.setMergePolicy(policy); + policy.setUseCompoundFile(useCompoundFile); + return cfg; + } + + public void index(IndexWriter writer, IndexDocValuesField valField, + ValueType valueType, long[] values, int offset, int num) + throws CorruptIndexException, IOException { + BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 }); + for (int i = offset; i < offset + num; i++) { + Document doc = new Document(); + + switch (valueType) { + case VAR_INTS: + values[i] = random.nextInt(); + valField.setInt(values[i]); + break; + case FIXED_INTS_16: + values[i] = random.nextInt(Short.MAX_VALUE); + valField.setInt((short) values[i], true); + break; + case FIXED_INTS_32: + values[i] = random.nextInt(); + valField.setInt((int) values[i], true); + break; + case FIXED_INTS_64: + values[i] = random.nextLong(); + valField.setInt(values[i], true); + break; + case FLOAT_64: + double nextDouble = random.nextDouble(); + values[i] = Double.doubleToRawLongBits(nextDouble); + valField.setFloat(nextDouble); + break; + case FLOAT_32: + float nextFloat = random.nextFloat(); + values[i] = Double.doubleToRawLongBits(nextFloat); + valField.setFloat(nextFloat); + break; + case FIXED_INTS_8: + values[i] = (byte) i; + valField.setInt((byte) i, true); + break; + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + values[i] = ref.length = 4; + valField.setBytes(ref, valueType); + break; + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + values[i] = ref.length = 1 + random.nextInt(4); + valField.setBytes(ref, valueType); + break; + + default: + fail("unexpected value " + valueType); + + } + doc.add(valField); + writer.addDocument(doc); + if (random.nextInt(10) == 0) { + writer.commit(); + } + } + } + + public void testPromoteBytes() throws IOException { + runTest(UNSORTED_BYTES, TestType.Byte); + } + + public void testSortedPromoteBytes() throws IOException { + runTest(SORTED_BYTES, TestType.Byte); + } + + public void testPromotInteger() throws IOException { + runTest(INTEGERS, TestType.Int); + } + + public void testPromotFloatingPoint() throws CorruptIndexException, + IOException { + runTest(FLOATS, TestType.Float); + } + + public void testDropOnMerge() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, + writerConfig(random.nextBoolean())); + int num_1 = atLeast(200); + int num_2 = atLeast(200); + int num_3 = atLeast(200); + long[] values = new long[num_1 + num_2 + num_3]; + index(writer, new IndexDocValuesField("promote"), + randomValueType(INTEGERS, random), values, 0, num_1); + writer.commit(); + + index(writer, new IndexDocValuesField("promote"), + randomValueType(UNSORTED_BYTES, random), values, num_1, num_2); + writer.commit(); + + if (random.nextInt(4) == 0) { + // once in a while use addIndexes + writer.optimize(); + + Directory dir_2 = newDirectory() ; + IndexWriter writer_2 = new IndexWriter(dir_2, + writerConfig(random.nextBoolean())); + index(writer_2, new IndexDocValuesField("promote"), + randomValueType(FLOATS, random), values, num_1 + num_2, num_3); + writer_2.commit(); + writer_2.close(); + if (random.nextBoolean()) { + writer.addIndexes(dir_2); + } else { + // do a real merge here + IndexReader open = IndexReader.open(dir_2); + writer.addIndexes(open); + open.close(); + } + dir_2.close(); + } else { + index(writer, new IndexDocValuesField("promote"), + randomValueType(SORTED_BYTES, random), values, num_1 + num_2, num_3); + writer.commit(); + } + + writer.optimize(); + writer.close(); + IndexReader reader = IndexReader.open(dir); + assertTrue(reader.isOptimized()); + ReaderContext topReaderContext = reader.getTopReaderContext(); + ReaderContext[] children = topReaderContext.children(); + IndexDocValues docValues = children[0].reader.docValues("promote"); + assertNull(docValues); // no docvalues - dropped + reader.close(); + dir.close(); + } + +} Property changes on: lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL