diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java index 3cd7c15..6372c13 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java @@ -28,6 +28,7 @@ import org.apache.lucene.facet.search.CountFacetRequest; import org.apache.lucene.facet.search.DrillDownQuery; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetsCollector; +import org.apache.lucene.facet.sortedset.DefaultSortedSetDocsValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; @@ -83,7 +84,7 @@ public class SimpleSortedSetFacetsExample { private List search() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(indexReader); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocsValuesReaderState(indexReader); // Count both "Publish Year" and "Author" dimensions FacetSearchParams fsp = new FacetSearchParams( @@ -111,7 +112,7 @@ public class SimpleSortedSetFacetsExample { private List drillDown() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(indexReader); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocsValuesReaderState(indexReader); // Now user drills down on Publish Year/2010: FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10)); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocsValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocsValuesReaderState.java new file mode 100644 index 0000000..fd4db13 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocsValuesReaderState.java @@ -0,0 +1,130 @@ +package org.apache.lucene.facet.sortedset; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class DefaultSortedSetDocsValuesReaderState extends SortedSetDocValuesReaderState { + private final String field; + private final AtomicReader topReader; + private final int valueCount; + final IndexReader origReader; + final char separator; + final String separatorRegex; + + private final Map prefixToOrdRange = new HashMap(); + + /** Create an instance, scanning the {@link + * SortedSetDocValues} from the provided reader, with + * default {@link FacetIndexingParams}. */ + public DefaultSortedSetDocsValuesReaderState(IndexReader reader) throws IOException { + this(FacetIndexingParams.DEFAULT, reader); + } + + /** Create an instance, scanning the {@link + * SortedSetDocValues} from the provided reader and + * {@link FacetIndexingParams}. */ + public DefaultSortedSetDocsValuesReaderState(FacetIndexingParams fip, IndexReader reader) throws IOException { + + this.field = fip.getCategoryListParams(null).field + FACET_FIELD_EXTENSION; + this.separator = fip.getFacetDelimChar(); + this.separatorRegex = Pattern.quote(Character.toString(separator)); + this.origReader = reader; + + // We need this to create thread-safe MultiSortedSetDV + // per collector: + topReader = SlowCompositeReaderWrapper.wrap(reader); + SortedSetDocValues dv = topReader.getSortedSetDocValues(field); + if (dv == null) { + throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues"); + } + if (dv.getValueCount() > Integer.MAX_VALUE) { + throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount()); + } + valueCount = (int) dv.getValueCount(); + + // TODO: we can make this more efficient if eg we can be + // "involved" when OrdinalMap is being created? Ie see + // each term/ord it's assigning as it goes... + String lastDim = null; + int startOrd = -1; + BytesRef spare = new BytesRef(); + + // TODO: this approach can work for full hierarchy?; + // TaxoReader can't do this since ords are not in + // "sorted order" ... but we should generalize this to + // support arbitrary hierarchy: + for(int ord=0;ord Integer.MAX_VALUE) { + throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount()); + } + int valCount = (int) dv.getValueCount(); + this.facetArrays = arrays == null ? new FacetArrays(valCount) : arrays; + // Check params: for (FacetRequest fr : fsp.facetRequests) { @@ -130,7 +135,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { // the top-level reader passed to the // SortedSetDocValuesReaderState, else cryptic // AIOOBE can happen: - if (ReaderUtil.getTopLevelContext(matchingDocs.context).reader() != state.origReader) { + if (ReaderUtil.getTopLevelContext(matchingDocs.context).reader() != state.getIndexReader()) { throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); } @@ -234,7 +239,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { dimCount += counts[ord]; FacetResultNode node = new FacetResultNode(ord, counts[ord]); dv.lookupOrd(ord, scratch); - node.label = new CategoryPath(scratch.utf8ToString().split(state.separatorRegex, 2)); + node.label = new CategoryPath(scratch.utf8ToString().split(state.getSeparatorRegex(), 2)); nodes.add(node); } } @@ -306,7 +311,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { for(int i=childNodes.length-1;i>=0;i--) { childNodes[i] = q.pop(); dv.lookupOrd(childNodes[i].ordinal, scratch); - childNodes[i].label = new CategoryPath(scratch.utf8ToString().split(state.separatorRegex, 2)); + childNodes[i].label = new CategoryPath(scratch.utf8ToString().split(state.getSeparatorRegex(), 2)); } rootNode.subResults = Arrays.asList(childNodes); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java index b890353..915bed4 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java @@ -17,18 +17,10 @@ package org.apache.lucene.facet.sortedset; * limitations under the License. */ import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.params.FacetIndexingParams; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.CompositeReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.util.BytesRef; /** Wraps a {@link IndexReader} and resolves ords * using existing {@link SortedSetDocValues} APIs without a @@ -46,15 +38,7 @@ import org.apache.lucene.util.BytesRef; * so you should create it once and re-use that one instance * for a given {@link IndexReader}. */ -public final class SortedSetDocValuesReaderState { - - private final String field; - private final AtomicReader topReader; - private final int valueCount; - final IndexReader origReader; - final char separator; - final String separatorRegex; - +public abstract class SortedSetDocValuesReaderState { /** Extension added to {@link CategoryListParams#field} * to determin which field to read/write facet ordinals from/to. */ public static final String FACET_FIELD_EXTENSION = "_sorted_doc_values"; @@ -74,82 +58,14 @@ public final class SortedSetDocValuesReaderState { this.end = end; } } + + public abstract IndexReader getIndexReader(); + + public abstract String getSeparatorRegex(); - private final Map prefixToOrdRange = new HashMap(); - - /** Create an instance, scanning the {@link - * SortedSetDocValues} from the provided reader, with - * default {@link FacetIndexingParams}. */ - public SortedSetDocValuesReaderState(IndexReader reader) throws IOException { - this(FacetIndexingParams.DEFAULT, reader); - } - - /** Create an instance, scanning the {@link - * SortedSetDocValues} from the provided reader and - * {@link FacetIndexingParams}. */ - public SortedSetDocValuesReaderState(FacetIndexingParams fip, IndexReader reader) throws IOException { - - this.field = fip.getCategoryListParams(null).field + FACET_FIELD_EXTENSION; - this.separator = fip.getFacetDelimChar(); - this.separatorRegex = Pattern.quote(Character.toString(separator)); - this.origReader = reader; + public abstract SortedSetDocValues getDocValues() throws IOException; - // We need this to create thread-safe MultiSortedSetDV - // per collector: - topReader = SlowCompositeReaderWrapper.wrap(reader); - SortedSetDocValues dv = topReader.getSortedSetDocValues(field); - if (dv == null) { - throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues"); - } - if (dv.getValueCount() > Integer.MAX_VALUE) { - throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount()); - } - valueCount = (int) dv.getValueCount(); - - // TODO: we can make this more efficient if eg we can be - // "involved" when OrdinalMap is being created? Ie see - // each term/ord it's assigning as it goes... - String lastDim = null; - int startOrd = -1; - BytesRef spare = new BytesRef(); - - // TODO: this approach can work for full hierarchy?; - // TaxoReader can't do this since ords are not in - // "sorted order" ... but we should generalize this to - // support arbitrary hierarchy: - for(int ord=0;ord rangeRequest = new RangeFacetRequest("field", diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java index 38cc398..cd31502 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java @@ -37,6 +37,7 @@ import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; +import org.apache.lucene.facet.sortedset.DefaultSortedSetDocsValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.CategoryPath; @@ -600,7 +601,7 @@ public class TestDrillSideways extends FacetTestCase { final SortedSetDocValuesReaderState sortedSetDVState; IndexSearcher s = newSearcher(r); if (doUseDV) { - sortedSetDVState = new SortedSetDocValuesReaderState(s.getIndexReader()); + sortedSetDVState = new DefaultSortedSetDocsValuesReaderState(s.getIndexReader()); } else { sortedSetDVState = null; } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java index ee2c9c3..732dfaf 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java @@ -27,8 +27,8 @@ import org.apache.lucene.facet.sampling.SamplingParams; import org.apache.lucene.facet.sampling.SamplingWrapper; import org.apache.lucene.facet.sampling.TakmiSampleFixer; import org.apache.lucene.facet.search.FacetRequest.ResultMode; +import org.apache.lucene.facet.sortedset.DefaultSortedSetDocsValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -422,7 +422,7 @@ public class TestFacetsCollector extends FacetTestCase { try { // SortedSetDocValuesAccumulator cannot even be created in such state - assertNull(new SortedSetDocValuesAccumulator(new SortedSetDocValuesReaderState(indexReader), fsp)); + assertNull(new SortedSetDocValuesAccumulator(new DefaultSortedSetDocsValuesReaderState(indexReader), fsp)); // if this ever changes, make sure FacetResultNode is labeled correctly fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField"); } catch (IllegalArgumentException e) { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java index 008a007..a9d72b6 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java @@ -107,7 +107,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { FacetSearchParams fsp = new FacetSearchParams(new FacetIndexingParams(clp), requests); // Per-top-reader state: - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(fip, searcher.getIndexReader()); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocsValuesReaderState(fip, searcher.getIndexReader()); //SortedSetDocValuesCollector c = new SortedSetDocValuesCollector(state); //SortedSetDocValuesCollectorMergeBySeg c = new SortedSetDocValuesCollectorMergeBySeg(state); @@ -160,7 +160,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { writer.addDocument(doc); IndexReader r = writer.getReader(); - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(r); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocsValuesReaderState(r); doc = new Document(); dvFields.addFields(doc, Collections.singletonList(new CategoryPath("a", "bar")));