Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (working copy) @@ -0,0 +1,101 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.facet.index.CategoryDocumentBuilder; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestDemoFacets extends LuceneTestCase { + + private DirectoryTaxonomyWriter taxoWriter; + private RandomIndexWriter writer; + + private void add(String ... categoryPaths) throws IOException { + Document doc = new Document(); + CategoryDocumentBuilder docBuilder = new CategoryDocumentBuilder(taxoWriter); + List paths = new ArrayList(); + for(String categoryPath : categoryPaths) { + CategoryPath cp = new CategoryPath(categoryPath, '/'); + paths.add(cp); + int ord = taxoWriter.addCategory(cp); + doc.add(new PackedLongDocValuesField(cp.getComponent(0), ord)); + } + docBuilder.setCategoryPaths(paths); + docBuilder.build(doc); + writer.addDocument(doc); + } + + public void test() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + add("Author/Lisa", "Publish Date/2012/1/1"); + add("Author/Susan", "Publish Date/2012/1/7"); + add("Author/Frank", "Publish Date/1999/5/5"); + + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + FacetSearchParams fsp = new FacetSearchParams(); + fsp.addFacetRequest(new CountFacetRequest(new CategoryPath("Publish Date"), 10)); + fsp.addFacetRequest(new CountFacetRequest(new CategoryPath("Author"), 10)); + FacetsCollector c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader); + CountingFacetsCollector c2 = new CountingFacetsCollector(fsp, taxoReader); + CachedCountingFacetsCollector c3 = new CachedCountingFacetsCollector(fsp, taxoReader); + DocValuesCountingFacetsCollector c4 = new DocValuesCountingFacetsCollector(fsp, taxoReader, "Publish Date", "Author"); + searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(c, c2, c3, c4)); + List results1 = c.getFacetResults(); + List results2 = c2.getFacetResults(); + List results3 = c3.getFacetResults(); + List results4 = c4.getFacetResults(); + assertEquals(results1.toString(), results2.toString()); + assertEquals(results1.toString(), results3.toString()); + assertEquals(results1.toString(), results4.toString()); + + taxoReader.close(); + searcher.getIndexReader().close(); + dir.close(); + taxoDir.close(); + } +} Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (working copy) @@ -0,0 +1,158 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; + +// nocommit rename? +// nocommit document limitations & detect when they are +// violated (eg, only one partition, must be vInt(dgap), +// only one category list) + +public class CountingFacetsCollector extends Collector { + private final FacetSearchParams fsp; + private final TaxonomyReader taxoReader; + private DocsAndPositionsEnum facetsEnum; + private BytesRef payload; + private final FacetArrays facetArrays; + // TODO: maybe sometimes int[] hash...? maybe start as + // int[] hash but then grow into full array? + private final int[] counts; + private CategoryListIterator cli; + + public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) { + this.fsp = fsp; + this.taxoReader = taxoReader; + // must pull array from here (instead of just a new + // int[]) in order to use later API to create + // FacetResults: + + // nocommit should not pass null -- need to pass CP of each FacetRequest? + facetArrays = new FacetArrays(new IntArrayAllocator(taxoReader.getSize(), 1), null); + counts = facetArrays.getIntArray(); + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + /* + Terms terms = context.reader().fields().terms(CategoryListParams.DEFAULT_TERM.field()); + if (terms == null) { + return; + } + TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(CategoryListParams.DEFAULT_TERM.bytes(), false)) { + return; + } + + // Safe to pass null liveDocs I think? Main query won't + // call us on deleted docs... + facetsEnum = termsEnum.docsAndPositions(null, + facetsEnum, + DocsAndPositionsEnum.FLAG_PAYLOADS); + */ + cli = fsp.getFacetIndexingParams().getCategoryListParams(null).createCategoryListIterator(context.reader(), 0); + cli.init(); + } + + @Override + public void collect(int doc) throws IOException { + /* + if (facetsEnum != null) { + int curDoc = facetsEnum.docID(); + if (curDoc > doc) { + // This document indexed no facets + return; + } else if (curDoc < doc) { + curDoc = facetsEnum.advance(doc); + } + + assert facetsEnum.freq() == 1; + + if (curDoc == doc) { + facetsEnum.nextPosition(); + payload = facetsEnum.getPayload(); + assert payload != null; + int upto = payload.offset; + final int end = payload.offset + payload.length; + int ord = 0; + int accum = 0; + final byte[] bytes = payload.bytes; + while(upto < end) { + byte b = bytes[upto++]; + accum = (accum << 7) | (b & 0x7F); + if (b >= 0) { + ord += accum; + counts[ord]++; + accum = 0; + } + } + assert accum == 0; + } + } + */ + if (cli.skipTo(doc)) { + long cat; + while((cat = cli.nextCategory()) < Integer.MAX_VALUE) { + counts[(int) cat]++; + } + } + } + + public synchronized List getFacetResults() throws IOException { + List res = new ArrayList(); + for(FacetRequest fr : fsp.getFacetRequests()) { + FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxoReader); + IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(facetArrays, 0); + FacetResult facetRes = frHndlr.renderFacetResult(res4fr); + frHndlr.labelResult(facetRes); + res.add(facetRes); + } + + return res; + } + + public int[] getRawCounts() { + return counts; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + // We must advance the facetsEnum in docID order: + return false; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + } +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/CachedCountingFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CachedCountingFacetsCollector.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CachedCountingFacetsCollector.java (working copy) @@ -0,0 +1,178 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; + +// nocommit rename? +// nocommit document limitations & detect when they are +// violated (eg, only one partition, must be vInt(dgap)) + +public class CachedCountingFacetsCollector extends Collector { + private final FacetSearchParams fsp; + private final TaxonomyReader taxoReader; + private CachedBytes readerCache; + private final FacetArrays facetArrays; + // TODO: maybe sometimes int[] hash...? maybe start as + // int[] hash but then grow into full array? + private final int[] counts; + + // nocommit hacky! should just use DocValues ... + private static final Map cache = new IdentityHashMap(); + + private static class CachedBytes { + public final PagedBytes.Reader bytes; + public final PackedInts.Reader docToOffset; + + public CachedBytes(AtomicReader reader) throws IOException { + long t0 = System.currentTimeMillis(); + Terms terms = reader.fields().terms(CategoryListParams.DEFAULT_TERM.field()); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(CategoryListParams.DEFAULT_TERM.bytes(), false)) { + PagedBytes payloads = new PagedBytes(15); + + // pointer==0 means not set + payloads.copyUsingLengthPrefix(new BytesRef()); + + final GrowableWriter address = new GrowableWriter(10, reader.maxDoc(), 0.5f); + + // Safe to pass null liveDocs I think? Main query won't + // call us on deleted docs... + DocsAndPositionsEnum facetsEnum = termsEnum.docsAndPositions(reader.getLiveDocs(), + null, + DocsAndPositionsEnum.FLAG_PAYLOADS); + int docID; + while((docID = facetsEnum.nextDoc()) != DocsAndPositionsEnum.NO_MORE_DOCS) { + assert facetsEnum.freq() == 1; + facetsEnum.nextPosition(); + BytesRef payload = facetsEnum.getPayload(); + assert payload != null; + address.set(docID, payloads.copyUsingLengthPrefix(payload)); + } + + bytes = payloads.freeze(true); + docToOffset = address.getMutable(); + long t1 = System.currentTimeMillis(); + System.out.println((t1-t0) + " msec to init reader=" + reader + "; cache size=" + RamUsageEstimator.sizeOf(cache)); + return; + } + } + + bytes = null; + docToOffset = null; + } + } + + public CachedCountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) { + this.fsp = fsp; + this.taxoReader = taxoReader; + // must pull array from here (instead of just a new + // int[]) in order to use later API to create + // FacetResults: + facetArrays = new FacetArrays(new IntArrayAllocator(taxoReader.getSize(), 1), null); + counts = facetArrays.getIntArray(); + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + synchronized(cache) { + if (!cache.containsKey(context.reader())) { + cache.put(context.reader(), new CachedBytes(context.reader())); + } + readerCache = cache.get(context.reader()); + } + } + + private final BytesRef scratch = new BytesRef(); + + @Override + public void collect(int doc) throws IOException { + if (readerCache.docToOffset != null) { + final int pointer = (int) readerCache.docToOffset.get(doc); + if (pointer != 0) { + readerCache.bytes.fill(scratch, pointer); + int upto = scratch.offset; + final int end = scratch.offset + scratch.length; + int ord = 0; + int accum = 0; + final byte[] bytes = scratch.bytes; + while(upto < end) { + byte b = bytes[upto++]; + accum = (accum << 7) | (b & 0x7F); + if (b >= 0) { + ord += accum; + counts[ord]++; + accum = 0; + } + } + assert accum == 0; + } + } + } + + public synchronized List getFacetResults() throws IOException { + List res = new ArrayList(); + for(FacetRequest fr : fsp.getFacetRequests()) { + FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxoReader); + IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(facetArrays, 0); + FacetResult facetRes = frHndlr.renderFacetResult(res4fr); + frHndlr.labelResult(facetRes); + res.add(facetRes); + } + + return res; + } + + public int[] getRawCounts() { + return counts; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + // We must advance the facetsEnum in docID order: + return false; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + } +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/CachedCountingFacetsCollector.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCountingFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCountingFacetsCollector.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCountingFacetsCollector.java (working copy) @@ -0,0 +1,133 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; + +// nocommit rename? +// nocommit document limitations & detect when they are +// violated (eg, only one partition, must be vInt(dgap)) + +public class DocValuesCountingFacetsCollector extends Collector { + private final FacetSearchParams fsp; + private final TaxonomyReader taxoReader; + private final FacetArrays facetArrays; + // TODO: maybe sometimes int[] hash...? maybe start as + // int[] hash but then grow into full array? + private final int[] counts; + private DocValues.Source[] readerValues; + private final String[] facetFields; + + public DocValuesCountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader, String... facetFields) { + this.fsp = fsp; + this.taxoReader = taxoReader; + // must pull array from here (instead of just a new + // int[]) in order to use later API to create + // FacetResults: + facetArrays = new FacetArrays(new IntArrayAllocator(taxoReader.getSize(), 1), null); + counts = facetArrays.getIntArray(); + readerValues = new DocValues.Source[facetFields.length]; + this.facetFields = facetFields; + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + for(int i=0;i getFacetResults() throws IOException { + // Aggregate counts up to parents ... note that this + // only works for single-valued fields (well, it could + // also work for multi-valued field if the CPs in the + // doc never "share" ancestors .. but DVs are + // still single-valued): + for(int ord=1;ord