diff --git a/.gitignore b/.gitignore index 63a631a..a6970cc 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ /solr/example/example-DIH/**/data /solr/example/example-DIH/**/dataimport.properties /solr/example/example-DIH/solr/mail/lib/*.jar +/output diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java index 021bd4b..7fc1a20 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java @@ -28,6 +28,7 @@ import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; @@ -88,7 +89,7 @@ public class SimpleSortedSetFacetsExample { private List search() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(indexReader); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexReader); // Aggregatses the facet counts FacetsCollector fc = new FacetsCollector(); @@ -113,7 +114,7 @@ public class SimpleSortedSetFacetsExample { private FacetResult drillDown() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(indexReader); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexReader); // Now user drills down on Publish Year/2010: DrillDownQuery q = new DrillDownQuery(config); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java new file mode 100644 index 0000000..9665b28 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java @@ -0,0 +1,137 @@ +package org.apache.lucene.facet.sortedset; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Default implementation of {@link SortedSetDocValuesFacetCounts} + */ +public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState { + + private final String field; + private final AtomicReader topReader; + private final int valueCount; + + /** {@link IndexReader} passed to the constructor. */ + public final IndexReader origReader; + + private final Map prefixToOrdRange = new HashMap(); + + /** Creates this, pulling doc values from the default {@link + * FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */ + public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException { + this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME); + } + + /** Creates this, pulling doc values from the specified + * field. */ + public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException { + this.field = field; + this.origReader = reader; + + // We need this to create thread-safe MultiSortedSetDV + // per collector: + topReader = SlowCompositeReaderWrapper.wrap(reader); + SortedSetDocValues dv = topReader.getSortedSetDocValues(field); + if (dv == null) { + throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues"); + } + if (dv.getValueCount() > Integer.MAX_VALUE) { + throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount()); + } + valueCount = (int) dv.getValueCount(); + + // TODO: we can make this more efficient if eg we can be + // "involved" when OrdinalMap is being created? Ie see + // each term/ord it's assigning as it goes... + String lastDim = null; + int startOrd = -1; + BytesRef spare = new BytesRef(); + + // TODO: this approach can work for full hierarchy?; + // TaxoReader can't do this since ords are not in + // "sorted order" ... but we should generalize this to + // support arbitrary hierarchy: + for(int ord=0;ord getPrefixToOrdRange() { + return prefixToOrdRange; + } + + /** Returns the {@link OrdRange} for this dimension. */ + @Override + public OrdRange getOrdRange(String dim) { + return prefixToOrdRange.get(dim); + } + + /** Indexed field we are reading. */ + @Override + public String getField() { + return field; + } + + @Override + public IndexReader getOrigReader() { + return origReader; + } + + /** Number of unique labels. */ + @Override + public int getSize() { + return valueCount; + } + +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java index a8fcfc6..b40d019 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java @@ -70,8 +70,8 @@ public class SortedSetDocValuesFacetCounts extends Facets { throws IOException { this.state = state; this.field = state.getField(); + dv = state.getDocValues(); counts = new int[state.getSize()]; - dv = state.getDocValues(); //System.out.println("field=" + field); count(hits.getMatchingDocs()); } @@ -157,6 +157,8 @@ public class SortedSetDocValuesFacetCounts extends Facets { } else { ordinalMap = null; } + + IndexReader origReader = state.getOrigReader(); for(MatchingDocs hits : matchingDocs) { @@ -166,7 +168,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { // the top-level reader passed to the // SortedSetDocValuesReaderState, else cryptic // AIOOBE can happen: - if (ReaderUtil.getTopLevelContext(hits.context).reader() != state.origReader) { + if (ReaderUtil.getTopLevelContext(hits.context).reader() != origReader) { throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java index 2518e6c..830999e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesReaderState.java @@ -45,14 +45,7 @@ import org.apache.lucene.util.BytesRef; * so you should create it once and re-use that one instance * for a given {@link IndexReader}. */ -public final class SortedSetDocValuesReaderState { - - private final String field; - private final AtomicReader topReader; - private final int valueCount; - - /** {@link IndexReader} passed to the constructor. */ - public final IndexReader origReader; +public abstract class SortedSetDocValuesReaderState { /** Holds start/end range of ords, which maps to one * dimension (someday we may generalize it to map to @@ -69,87 +62,24 @@ public final class SortedSetDocValuesReaderState { this.end = end; } } - - private final Map prefixToOrdRange = new HashMap(); - - /** Creates this, pulling doc values from the default {@link - * FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */ - public SortedSetDocValuesReaderState(IndexReader reader) throws IOException { - this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME); - } - - /** Creates this, pulling doc values from the specified - * field. */ - public SortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException { - - this.field = field; - this.origReader = reader; - - // We need this to create thread-safe MultiSortedSetDV - // per collector: - topReader = SlowCompositeReaderWrapper.wrap(reader); - SortedSetDocValues dv = topReader.getSortedSetDocValues(field); - if (dv == null) { - throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues"); - } - if (dv.getValueCount() > Integer.MAX_VALUE) { - throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount()); - } - valueCount = (int) dv.getValueCount(); - - // TODO: we can make this more efficient if eg we can be - // "involved" when OrdinalMap is being created? Ie see - // each term/ord it's assigning as it goes... - String lastDim = null; - int startOrd = -1; - BytesRef spare = new BytesRef(); - - // TODO: this approach can work for full hierarchy?; - // TaxoReader can't do this since ords are not in - // "sorted order" ... but we should generalize this to - // support arbitrary hierarchy: - for(int ord=0;ord getPrefixToOrdRange() { - return prefixToOrdRange; - } - - /** Returns the {@link OrdRange} for this dimension. */ - public OrdRange getOrdRange(String dim) { - return prefixToOrdRange.get(dim); - } - + public abstract SortedSetDocValues getDocValues() throws IOException; + /** Indexed field we are reading. */ - public String getField() { - return field; - } - + public abstract String getField(); + + /** Returns the {@link OrdRange} for this dimension. */ + public abstract OrdRange getOrdRange(String dim); + + /** Returns mapping from prefix to {@link OrdRange}. */ + public abstract Map getPrefixToOrdRange(); + + /** + * Returns top-level index reader + */ + public abstract IndexReader getOrigReader(); + /** Number of unique labels. */ - public int getSize() { - return valueCount; - } + public abstract int getSize(); } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index f966f91..8bbfd3a 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -32,6 +32,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult; +import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -552,7 +553,7 @@ public class TestDrillSideways extends FacetTestCase { IndexSearcher s = newSearcher(r); if (doUseDV) { - sortedSetDVState = new SortedSetDocValuesReaderState(s.getIndexReader()); + sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader()); } else { sortedSetDVState = null; } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java index b3f08f2..bccd329 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java @@ -74,7 +74,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { IndexSearcher searcher = newSearcher(writer.getReader()); // Per-top-reader state: - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader()); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); FacetsCollector c = new FacetsCollector(); @@ -110,7 +110,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { writer.addDocument(config.build(doc)); IndexReader r = writer.getReader(); - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(r); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(r); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("a", "bar")); @@ -176,7 +176,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { writer.close(); // Per-top-reader state: - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader()); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); FacetsCollector c = new FacetsCollector(); searcher.search(new MatchAllDocsQuery(), c); @@ -221,7 +221,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { writer.close(); // Per-top-reader state: - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader()); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); FacetsCollector c = new FacetsCollector(); searcher.search(new MatchAllDocsQuery(), c); @@ -256,7 +256,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { IndexSearcher searcher = new IndexSearcher(SlowCompositeReaderWrapper.wrap(writer.getReader())); // Per-top-reader state: - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader()); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); FacetsCollector c = new FacetsCollector(); searcher.search(new MatchAllDocsQuery(), c); @@ -295,7 +295,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { IndexSearcher searcher = newSearcher(w.getReader()); // Per-top-reader state: - SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(searcher.getIndexReader()); + SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader()); int iters = atLeast(100); for(int iter=0;iter