Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
===================================================================
--- lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (revision 1445183)
+++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (working copy)
@@ -2,6 +2,8 @@
import java.util.Random;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.facet.codecs.facet42.Facet42Codec;
import org.apache.lucene.facet.encoding.DGapIntEncoder;
import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
import org.apache.lucene.facet.encoding.EightFlagsIntEncoder;
@@ -13,6 +15,8 @@
import org.apache.lucene.facet.encoding.VInt8IntEncoder;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.util.LuceneTestCase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -43,6 +47,24 @@
new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(4)))),
};
+ private static Codec savedDefault = null;
+
+ @BeforeClass
+ public static void beforeClassFacetTestCase() throws Exception {
+ if (random().nextDouble() < 0.3) {
+ savedDefault = Codec.getDefault(); // save to restore later
+ Codec.setDefault(new Facet42Codec());
+ }
+ }
+
+ @AfterClass
+ public static void afterClassFacetTestCase() throws Exception {
+ if (savedDefault != null) {
+ Codec.setDefault(savedDefault);
+ savedDefault = null;
+ }
+ }
+
/** Returns a {@link CategoryListParams} with random {@link IntEncoder} and field. */
public static CategoryListParams randomCategoryListParams() {
final String field = CategoryListParams.DEFAULT_FIELD + "$" + random().nextInt();
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java (working copy)
@@ -0,0 +1,81 @@
+package org.apache.lucene.facet.codecs.facet42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.facet.params.CategoryListParams;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+
+/**
+ * Same as {@link Lucene42Codec} except it uses {@link Facet42DocValuesFormat}
+ * for facet fields (faster-but-more-RAM-consuming doc values).
+ *
+ *
+ * NOTE: this codec does not support facet partitions (see
+ * {@link FacetIndexingParams#getPartitionSize()}).
+ *
+ *
+ * NOTE: this format cannot handle more than 2 GB
+ * of facet data in a single segment. If your usage may hit
+ * this limit, you can either use Lucene's default
+ * DocValuesFormat, limit the maximum segment size in your
+ * MergePolicy, or send us a patch fixing the limitation.
+ *
+ * @lucene.experimental
+ */
+public class Facet42Codec extends Lucene42Codec {
+
+ private final Set facetFields;
+ private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facet42");
+ private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
+
+ // must have that for SPI purposes
+ /** Default constructor, uses {@link FacetIndexingParams#ALL_PARENTS}. */
+ public Facet42Codec() {
+ this(FacetIndexingParams.ALL_PARENTS);
+ }
+
+ /**
+ * Initializes with the given {@link FacetIndexingParams}. Returns the proper
+ * {@link DocValuesFormat} for the fields that are returned by
+ * {@link FacetIndexingParams#getAllCategoryListParams()}.
+ */
+ public Facet42Codec(FacetIndexingParams fip) {
+ if (fip.getPartitionSize() != Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("this Codec does not support partitions");
+ }
+ this.facetFields = new HashSet();
+ for (CategoryListParams clp : fip.getAllCategoryListParams()) {
+ facetFields.add(clp.field);
+ }
+ }
+
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ if (facetFields.contains(field)) {
+ return facetsDVFormat;
+ } else {
+ return lucene42DVFormat;
+ }
+ }
+
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42Codec.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java (working copy)
@@ -0,0 +1,63 @@
+package org.apache.lucene.facet.codecs.facet42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+/**
+ * DocValues format that only handles binary doc values and
+ * is optimized for usage with facets. It uses more RAM than other
+ * formats in exchange for faster lookups.
+ *
+ *
+ * NOTE: this format cannot handle more than 2 GB
+ * of facet data in a single segment. If your usage may hit
+ * this limit, you can either use Lucene's default
+ * DocValuesFormat, limit the maximum segment size in your
+ * MergePolicy, or send us a patch fixing the limitation.
+ *
+ * @lucene.experimental
+ */
+public final class Facet42DocValuesFormat extends DocValuesFormat {
+
+ public static final String CODEC = "FacetsDocValues";
+ public static final String EXTENSION = "fdv";
+ public static final int VERSION_START = 0;
+ public static final int VERSION_CURRENT = VERSION_START;
+
+ public Facet42DocValuesFormat() {
+ super("Facet42");
+ }
+
+ @Override
+ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ return new Facet42DocValuesConsumer(state);
+ }
+
+ @Override
+ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return new Facet42DocValuesProducer(state);
+ }
+
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesFormat.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java (working copy)
@@ -0,0 +1,115 @@
+package org.apache.lucene.facet.codecs.facet42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.PackedInts;
+
+/** Writer for {@link Facet42DocValuesFormat}. */
+public class Facet42DocValuesConsumer extends DocValuesConsumer {
+
+ final IndexOutput out;
+ final int maxDoc;
+ final float acceptableOverheadRatio;
+
+ public Facet42DocValuesConsumer(SegmentWriteState state) throws IOException {
+ this(state, PackedInts.DEFAULT);
+ }
+
+ public Facet42DocValuesConsumer(SegmentWriteState state, float acceptableOverheadRatio) throws IOException {
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ boolean success = false;
+ try {
+ String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
+ out = state.directory.createOutput(fileName, state.context);
+ CodecUtil.writeHeader(out, Facet42DocValuesFormat.CODEC, Facet42DocValuesFormat.VERSION_CURRENT);
+ maxDoc = state.segmentInfo.getDocCount();
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this);
+ }
+ }
+ }
+
+ @Override
+ public void addNumericField(FieldInfo field, Iterable values) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
+ }
+
+ @Override
+ public void addBinaryField(FieldInfo field, final Iterable values) throws IOException {
+ // write the byte[] data
+ out.writeVInt(field.number);
+
+ long totBytes = 0;
+ for (BytesRef v : values) {
+ totBytes += v.length;
+ }
+
+ if (totBytes > Integer.MAX_VALUE) {
+ throw new IllegalStateException("too many facets in one segment: Facet42DocValues cannot handle more than 2 GB facet data per segment");
+ }
+
+ out.writeVInt((int) totBytes);
+
+ for (BytesRef v : values) {
+ out.writeBytes(v.bytes, v.offset, v.length);
+ }
+
+ PackedInts.Writer w = PackedInts.getWriter(out, maxDoc+1, PackedInts.bitsRequired(totBytes+1), acceptableOverheadRatio);
+
+ int address = 0;
+ for(BytesRef v : values) {
+ w.add(address);
+ address += v.length;
+ }
+ w.add(address);
+ w.finish();
+ }
+
+ @Override
+ public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
+ }
+
+ @Override
+ public void close() throws IOException {
+ boolean success = false;
+ try {
+ out.writeVInt(-1); // write EOF marker
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(out);
+ } else {
+ IOUtils.closeWhileHandlingException(out);
+ }
+ }
+ }
+
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesConsumer.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html (working copy)
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+Codec + DocValuesFormat that are optimized for facets.
+
+
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java (working copy)
@@ -0,0 +1,80 @@
+package org.apache.lucene.facet.codecs.facet42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
+class Facet42DocValuesProducer extends DocValuesProducer {
+
+ private final Map fields = new HashMap();
+
+ Facet42DocValuesProducer(SegmentReadState state) throws IOException {
+ String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Facet42DocValuesFormat.EXTENSION);
+ IndexInput in = state.directory.openInput(fileName, state.context);
+ boolean success = false;
+ try {
+ CodecUtil.checkHeader(in, Facet42DocValuesFormat.CODEC,
+ Facet42DocValuesFormat.VERSION_START,
+ Facet42DocValuesFormat.VERSION_START);
+ int fieldNumber = in.readVInt();
+ while (fieldNumber != -1) {
+ fields.put(fieldNumber, new Facet42BinaryDocValues(in));
+ fieldNumber = in.readVInt();
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(in);
+ } else {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+ }
+
+ @Override
+ public NumericDocValues getNumeric(FieldInfo field) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues only implements binary");
+ }
+
+ @Override
+ public BinaryDocValues getBinary(FieldInfo field) throws IOException {
+ return fields.get(field.number);
+ }
+
+ @Override
+ public SortedDocValues getSorted(FieldInfo field) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues only implements binary");
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java (working copy)
@@ -0,0 +1,47 @@
+package org.apache.lucene.facet.codecs.facet42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+
+class Facet42BinaryDocValues extends BinaryDocValues {
+
+ private final byte[] bytes;
+ private final PackedInts.Reader addresses;
+
+ Facet42BinaryDocValues(DataInput in) throws IOException {
+ int totBytes = in.readVInt();
+ bytes = new byte[totBytes];
+ in.readBytes(bytes, 0, totBytes);
+ addresses = PackedInts.getReader(in);
+ }
+
+ @Override
+ public void get(int docID, BytesRef ret) {
+ int start = (int) addresses.get(docID);
+ ret.bytes = bytes;
+ ret.offset = start;
+ ret.length = (int) (addresses.get(docID+1)-start);
+ }
+
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
===================================================================
--- lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (revision 0)
+++ lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (working copy)
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat
Property changes on: lucene/facet/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/build.xml
===================================================================
--- lucene/facet/build.xml (revision 1445183)
+++ lucene/facet/build.xml (working copy)
@@ -31,7 +31,7 @@
-
+
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1445183)
+++ lucene/CHANGES.txt (working copy)
@@ -112,6 +112,10 @@
table-compression, etc), and memory addresses use MonotonicBlockPackedWriter.
(Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir)
+* LUCENE-4764: A new Facet42Codec and Facet42DocValuesFormat provide
+ faster but more RAM-consuming facet performance. (Shai Erera, Mike
+ McCandless)
+
New Features
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the