Index: lucene/facet/build.xml =================================================================== --- lucene/facet/build.xml (revision 1445108) +++ lucene/facet/build.xml (working copy) @@ -31,7 +31,7 @@ - + Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java (working copy) @@ -0,0 +1,47 @@ +package org.apache.lucene.facet.codecs.facet42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.packed.PackedInts; + +class Facet42BinaryDocValues extends BinaryDocValues { + + private final byte[] bytes; + private final PackedInts.Reader addresses; + + Facet42BinaryDocValues(DataInput in) throws IOException { + int totBytes = in.readVInt(); + bytes = new byte[totBytes]; + in.readBytes(bytes, 0, totBytes); + addresses = PackedInts.getReader(in); + } + + @Override + public void get(int docID, BytesRef ret) { + int start = (int) addresses.get(docID); + ret.bytes = bytes; + ret.offset = start; + ret.length = (int) (addresses.get(docID+1)-start); + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java (working copy) @@ -0,0 +1,74 @@ +package org.apache.lucene.facet.codecs.facet42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.lucene42.Lucene42Codec; +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.params.FacetIndexingParams; + +/** + * Same as {@link Lucene42Codec} except it uses {@link Facet42DocValuesFormat} + * for facet fields (faster-but-more-RAM-consuming doc values). + * + *

+ * NOTE: this codec does not support facet partitions (see + * {@link FacetIndexingParams#getPartitionSize()}). + * + * @lucene.experimental + */ +public final class Facet42Codec extends Lucene42Codec { + + private final Set facetFields; + private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facet42"); + private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42"); + + // must have that for SPI purposes + /** Default constructor, uses {@link FacetIndexingParams#ALL_PARENTS}. */ + public Facet42Codec() { + this(FacetIndexingParams.ALL_PARENTS); + } + + /** + * Initializes with the given {@link FacetIndexingParams}. Returns the proper + * {@link DocValuesFormat} for the fields that are returned by + * {@link FacetIndexingParams#getAllCategoryListParams()}. + */ + public Facet42Codec(FacetIndexingParams fip) { + if (fip.getPartitionSize() != Integer.MAX_VALUE) { + throw new IllegalArgumentException("this Codec does not support partitions"); + } + this.facetFields = new HashSet(); + for (CategoryListParams clp : fip.getAllCategoryListParams()) { + facetFields.add(clp.field); + } + } + + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + if (facetFields.contains(field)) { + return facetsDVFormat; + } else { + return lucene42DVFormat; + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java (working copy) @@ -0,0 +1,113 @@ +package org.apache.lucene.facet.codecs.facet42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.packed.PackedInts; + +/** writer for {@link Facet42DocValuesFormat} */ +public class Facet42DocValuesConsumer extends DocValuesConsumer { + + final IndexOutput out; + final int maxDoc; + + public Facet42DocValuesConsumer(SegmentWriteState state) throws IOException { + boolean success = false; + try { + String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION); + out = state.directory.createOutput(fileName, state.context); + CodecUtil.writeHeader(out, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_CURRENT); + maxDoc = state.segmentInfo.getDocCount(); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public void addNumericField(FieldInfo field, Iterable values) throws IOException { + throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields"); + } + + @Override + public void addBinaryField(FieldInfo field, final Iterable values) throws IOException { + // write the byte[] data + out.writeVInt(field.number); + + long totBytes = 0; + for (BytesRef v : values) { + totBytes += v.length; + } + + if (totBytes > Integer.MAX_VALUE) { + // nocommit fixme: + throw new IllegalStateException(); + } + + out.writeVInt((int) totBytes); + + // nocommit treat int[] ords as long int[] contatenated + // from all docs? + for (BytesRef v : values) { + out.writeBytes(v.bytes, v.offset, v.length); + } + + // nocommit make FASTEST controllable + PackedInts.Writer w = PackedInts.getWriter(out, maxDoc+1, PackedInts.bitsRequired(totBytes+1), PackedInts.FASTEST); + + int address = 0; + for(BytesRef v : values) { + w.add(address); + address += v.length; + } + w.add(address); + w.finish(); + } + + @Override + public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException { + throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields"); + } + + @Override + public void close() throws IOException { + boolean success = false; + try { + out.writeVInt(-1); // write EOF marker + success = true; + } finally { + if (success) { + IOUtils.close(out); + } else { + IOUtils.closeWhileHandlingException(out); + } + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java (working copy) @@ -0,0 +1,56 @@ +package org.apache.lucene.facet.codecs.facet42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +/** + * DocValues format that only handles binary doc values and + * is optimized for usage with facets. It uses more RAM than other + * formats in exchange for faster lookups. + * + * @lucene.experimental + */ +public final class Facet42DocValuesFormat extends DocValuesFormat { + + public static final String CODEC = "FacetsDocValues"; + public static final String EXTENSION = "fdv"; + public static final int VERSION_START = 0; + public static final int VERSION_CURRENT = VERSION_START; + + public Facet42DocValuesFormat() { + super("Facets42"); + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + return new Facet42DocValuesConsumer(state); + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return new Facet42DocValuesProducer(state); + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java (working copy) @@ -0,0 +1,81 @@ +package org.apache.lucene.facet.codecs.facet42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; + +class Facet42DocValuesProducer extends DocValuesProducer { + + private final Map fields = new HashMap(); + + Facet42DocValuesProducer(SegmentReadState state) throws IOException { + String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION); + IndexInput in = state.directory.openInput(fileName, state.context); + boolean success = false; + try { + CodecUtil.checkHeader(in, Facet42DocValuesFormat.CODEC, + Facet42DocValuesFormat.VERSION_START, + Facet42DocValuesFormat.VERSION_START); + int fieldNumber = in.readVInt(); + while (fieldNumber != -1) { + fields.put(fieldNumber, new Facet42BinaryDocValues(in)); + fieldNumber = in.readVInt(); + } + success = true; + } finally { + if (success) { + IOUtils.close(in); + } else { + IOUtils.closeWhileHandlingException(in); + } + } + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + throw new UnsupportedOperationException("FacetsDocValues only implements binary"); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return fields.get(field.number); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + throw new UnsupportedOperationException("FacetsDocValues only implements binary"); + } + + @Override + public void close() throws IOException { + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html (working copy) @@ -0,0 +1,25 @@ + + + + + + + +Codec + DocValuesFormat that are optimized for facets. + + Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (revision 1445108) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (working copy) @@ -68,9 +68,10 @@ if (dv == null) { // this reader does not have DocValues for the requested category list return; } - + final int length = matchingDocs.bits.length(); final int[] counts = facetArrays.getIntArray(); + int doc = 0; while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { dv.get(doc, buf); Index: lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat =================================================================== --- lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (revision 0) +++ lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (working copy) @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat Property changes on: lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (revision 1445108) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (working copy) @@ -16,12 +16,13 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.codecs.facet42.Facet42Codec; import org.apache.lucene.facet.collections.IntToObjectMap; import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; @@ -33,8 +34,8 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -180,7 +181,8 @@ /** Returns indexing params for the main index */ protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) { - return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + // nocommit + return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(new Facet42Codec()); } /** Returns a {@link FacetIndexingParams} per the given partition size. */ Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (revision 1445108) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (working copy) @@ -2,6 +2,8 @@ import java.util.Random; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.facet.codecs.facet42.Facet42Codec; import org.apache.lucene.facet.encoding.DGapIntEncoder; import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder; import org.apache.lucene.facet.encoding.EightFlagsIntEncoder; @@ -13,6 +15,8 @@ import org.apache.lucene.facet.encoding.VInt8IntEncoder; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -43,6 +47,24 @@ new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(4)))), }; + private static Codec savedDefault = null; + + @BeforeClass + public static void beforeClassFacetTestCase() throws Exception { + if (random().nextDouble() < 0.3) { + savedDefault = Codec.getDefault(); // save to restore later + Codec.setDefault(new Facet42Codec()); + } + } + + @AfterClass + public static void afterClassFacetTestCase() throws Exception { + if (savedDefault != null) { + Codec.setDefault(savedDefault); + savedDefault = null; + } + } + /** Returns a {@link CategoryListParams} with random {@link IntEncoder} and field. */ public static CategoryListParams randomCategoryListParams() { final String field = CategoryListParams.DEFAULT_FIELD + "$" + random().nextInt();