Index: lucene/facet/build.xml
===================================================================
--- lucene/facet/build.xml (revision 1444349)
+++ lucene/facet/build.xml (working copy)
@@ -31,7 +31,7 @@
-
+
Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
===================================================================
--- lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (revision 1444349)
+++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (working copy)
@@ -18,10 +18,11 @@
import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.collections.IntToObjectMap;
import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.index.Facets42Codec;
+import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
-import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
@@ -33,8 +34,8 @@
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -180,7 +181,8 @@
/** Returns indexing params for the main index */
protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
- return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ // nocommit
+ return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(new Facets42Codec());
}
/** Returns a {@link FacetIndexingParams} per the given partition size. */
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesProducer.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesProducer.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesProducer.java (working copy)
@@ -0,0 +1,85 @@
+package org.apache.lucene.facet.codecs.facetsdv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+
+class FacetsDocValuesProducer extends DocValuesProducer {
+
+ private final Map fields = new HashMap();
+
+ FacetsDocValuesProducer(SegmentReadState state) throws IOException {
+ String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FacetsDocValuesFormat.EXTENSION);
+ IndexInput in = state.directory.openInput(fileName, state.context);
+ boolean success = false;
+ try {
+ CodecUtil.checkHeader(in, FacetsDocValuesFormat.CODEC,
+ FacetsDocValuesFormat.VERSION_START,
+ FacetsDocValuesFormat.VERSION_START);
+ int fieldNumber = in.readVInt();
+ while (fieldNumber != -1) {
+ fields.put(fieldNumber, new FacetsBinaryDocValues(in));
+ fieldNumber = in.readVInt();
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(in);
+ } else {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+ }
+
+ @Override
+ public NumericDocValues getNumeric(FieldInfo field) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues only implements binary");
+ }
+
+ @Override
+ public BinaryDocValues getBinary(FieldInfo field) throws IOException {
+ return fields.get(field.number);
+ }
+
+ @Override
+ public SortedDocValues getSorted(FieldInfo field) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues only implements binary");
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesProducer.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsBinaryDocValues.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsBinaryDocValues.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsBinaryDocValues.java (working copy)
@@ -0,0 +1,45 @@
+package org.apache.lucene.facet.codecs.facetsdv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+
+public class FacetsBinaryDocValues extends BinaryDocValues {
+ public final byte[] bytes;
+ public final PackedInts.Reader addresses;
+
+ FacetsBinaryDocValues(DataInput in) throws IOException {
+ int totBytes = in.readVInt();
+ System.out.println("FDV: " + totBytes);
+ bytes = new byte[totBytes];
+ in.readBytes(bytes, 0, totBytes);
+ addresses = PackedInts.getReader(in);
+ }
+
+ @Override
+ public void get(int docID, BytesRef ret) {
+ int start = (int) addresses.get(docID);
+ ret.bytes = bytes;
+ ret.offset = start;
+ ret.length = (int) (addresses.get(docID+1)-start);
+ }
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsBinaryDocValues.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesFormat.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesFormat.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesFormat.java (working copy)
@@ -0,0 +1,58 @@
+package org.apache.lucene.facet.codecs.facetsdv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+/**
+ * DocValues format that only handles binary doc values and
+ * is optimized for usage with facets. It uses more RAM than other
+ * formats in exchange for faster lookups.
+ *
+ * @lucene.experimental
+ */
+
+// nocommit rename to Facets42DVFormat
+
+public final class FacetsDocValuesFormat extends DocValuesFormat {
+
+ public FacetsDocValuesFormat() {
+ super("Facets");
+ }
+
+ @Override
+ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ return new FacetsDocValuesConsumer(state);
+ }
+
+ @Override
+ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return new FacetsDocValuesProducer(state);
+ }
+
+ public static final String CODEC = "FacetsDocValues";
+ public static final String EXTENSION = "fdv";
+ public static final int VERSION_START = 0;
+ public static final int VERSION_CURRENT = VERSION_START;
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesFormat.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesConsumer.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesConsumer.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesConsumer.java (working copy)
@@ -0,0 +1,114 @@
+package org.apache.lucene.facet.codecs.facetsdv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
+/** writer for {@link FacetsDocValuesFormat} */
+public class FacetsDocValuesConsumer extends DocValuesConsumer {
+
+ final IndexOutput out;
+ final int maxDoc;
+
+ public FacetsDocValuesConsumer(SegmentWriteState state) throws IOException {
+ boolean success = false;
+ try {
+ String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FacetsDocValuesFormat.EXTENSION);
+ out = state.directory.createOutput(fileName, state.context);
+ CodecUtil.writeHeader(out, FacetsDocValuesFormat.CODEC, FacetsDocValuesFormat.VERSION_CURRENT);
+ maxDoc = state.segmentInfo.getDocCount();
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this);
+ }
+ }
+ }
+
+ @Override
+ public void addNumericField(FieldInfo field, Iterable values) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
+ }
+
+ @Override
+ public void addBinaryField(FieldInfo field, final Iterable values) throws IOException {
+ // write the byte[] data
+ out.writeVInt(field.number);
+
+ long totBytes = 0;
+ for(BytesRef v : values) {
+ totBytes += v.length;
+ }
+
+ if (totBytes > Integer.MAX_VALUE) {
+ // nocommit fixme:
+ throw new IllegalStateException();
+ }
+
+ out.writeVInt((int) totBytes);
+
+ // nocommit treat int[] ords as long int[] contatenated
+ // from all docs?
+ for(BytesRef v : values) {
+ out.writeBytes(v.bytes, v.offset, v.length);
+ }
+
+ // nocommit make FASTEST controllable
+ PackedInts.Writer w = PackedInts.getWriter(out, maxDoc+1, PackedInts.bitsRequired(totBytes+1), PackedInts.FASTEST);
+
+ int address = 0;
+ for(BytesRef v : values) {
+ w.add(address);
+ address += v.length;
+ }
+ w.add(address);
+ w.finish();
+ }
+
+ @Override
+ public void addSortedField(FieldInfo field, Iterable values, Iterable docToOrd) throws IOException {
+ throw new UnsupportedOperationException("FacetsDocValues can only handle binary fields");
+ }
+
+ @Override
+ public void close() throws IOException {
+ boolean success = false;
+ try {
+ out.writeVInt(-1); // write EOF marker
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(out);
+ } else {
+ IOUtils.closeWhileHandlingException(out);
+ }
+ }
+ }
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/FacetsDocValuesConsumer.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/package.html
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/package.html (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/package.html (working copy)
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+DocValuesFormat that's optimized for facets.
+
+
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facetsdv/package.html
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (revision 1444349)
+++ lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (working copy)
@@ -2,6 +2,7 @@
import java.io.IOException;
+import org.apache.lucene.facet.codecs.facetsdv.FacetsBinaryDocValues;
import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder;
import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
import org.apache.lucene.facet.params.CategoryListParams;
@@ -10,6 +11,7 @@
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -68,20 +70,21 @@
if (dv == null) { // this reader does not have DocValues for the requested category list
return;
}
-
+
final int length = matchingDocs.bits.length();
final int[] counts = facetArrays.getIntArray();
- int doc = 0;
- while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
- dv.get(doc, buf);
- if (buf.length > 0) {
- // this document has facets
- final int upto = buf.offset + buf.length;
+
+ if (dv instanceof FacetsBinaryDocValues) {
+ final byte[] bytes = ((FacetsBinaryDocValues) dv).bytes;
+ final PackedInts.Reader addresses = ((FacetsBinaryDocValues) dv).addresses;
+ int doc = 0;
+ while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
+ int offset = (int) addresses.get(doc);
+ final int end = (int) addresses.get(1+doc);
int ord = 0;
- int offset = buf.offset;
int prev = 0;
- while (offset < upto) {
- byte b = buf.bytes[offset++];
+ while (offset < end) {
+ byte b = bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
++counts[ord];
@@ -90,8 +93,32 @@
ord = (ord << 7) | (b & 0x7F);
}
}
+ ++doc;
}
- ++doc;
+ } else {
+
+ int doc = 0;
+ while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
+ dv.get(doc, buf);
+ if (buf.length > 0) {
+ // this document has facets
+ final int upto = buf.offset + buf.length;
+ int ord = 0;
+ int offset = buf.offset;
+ int prev = 0;
+ while (offset < upto) {
+ byte b = buf.bytes[offset++];
+ if (b >= 0) {
+ prev = ord = ((ord << 7) | b) + prev;
+ ++counts[ord];
+ ord = 0;
+ } else {
+ ord = (ord << 7) | (b & 0x7F);
+ }
+ }
+ }
+ ++doc;
+ }
}
}
Index: lucene/facet/src/java/org/apache/lucene/facet/index/Facets42Codec.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/index/Facets42Codec.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/index/Facets42Codec.java (working copy)
@@ -0,0 +1,41 @@
+package org.apache.lucene.facet.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+
+/** Same as {@link Lucene42Codec} except it uses {@link
+ * FacetsDocValuesFormat} for facet fields
+ * (faster-but-more-RAM-consuming doc values). */
+
+public class Facets42Codec extends Lucene42Codec {
+ private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facets");
+ private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
+
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ System.out.println("FIELD: " + field);
+ // nocommit what about multiple/custom CLPs?
+ if (field.equals("$facets") || field.equals("$all")) {
+ return facetsDVFormat;
+ } else {
+ return lucene42DVFormat;
+ }
+ }
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/index/Facets42Codec.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property