Index: lucene/core/src/java/org/apache/lucene/document/Field.java =================================================================== --- lucene/core/src/java/org/apache/lucene/document/Field.java (revision 1421049) +++ lucene/core/src/java/org/apache/lucene/document/Field.java (working copy) @@ -343,7 +343,7 @@ * not to change it until you're done with this field. */ public void setBytesValue(BytesRef value) { - if (!(fieldsData instanceof BytesRef)) { + if (fieldsData != null && !(fieldsData instanceof BytesRef)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to BytesRef"); } if (type.indexed()) { Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (revision 1421049) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (working copy) @@ -21,8 +21,11 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.document.DocValuesFacetField; +import org.apache.lucene.document.PayloadFacetField; import org.apache.lucene.document.Document; import org.apache.lucene.facet.index.CategoryDocumentBuilder; +import org.apache.lucene.facet.search.DocValuesFacetsCollector; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; @@ -35,15 +38,16 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +// nocommit cutover more tests to XXXFacetField public class TestDemoFacets extends LuceneTestCase { private DirectoryTaxonomyWriter taxoWriter; private RandomIndexWriter writer; - private CategoryDocumentBuilder docBuilder; private void add(String ... categoryPaths) throws IOException { Document doc = new Document(); @@ -52,8 +56,8 @@ for(String categoryPath : categoryPaths) { paths.add(new CategoryPath(categoryPath, '/')); } - docBuilder.setCategoryPaths(paths); - docBuilder.build(doc); + doc.add(new PayloadFacetField(paths, taxoWriter)); + doc.add(new DocValuesFacetField(paths, taxoWriter)); writer.addDocument(doc); } @@ -66,10 +70,6 @@ // main index: taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); - // Reused across documents, to add the necessary facet - // fields: - docBuilder = new CategoryDocumentBuilder(taxoWriter); - add("Author/Bob", "Publish Date/2010/10/15"); add("Author/Lisa", "Publish Date/2010/10/20"); add("Author/Lisa", "Publish Date/2012/1/1"); @@ -93,32 +93,36 @@ // Aggregatses the facet counts: FacetsCollector c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader); + DocValuesFacetsCollector c2 = new DocValuesFacetsCollector(fsp, taxoReader); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: - searcher.search(new MatchAllDocsQuery(), c); + searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(c, c2)); // Retrieve & verify results: - List results = c.getFacetResults(); + List results = c2.getFacetResults(); assertEquals(2, results.size()); assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n", toSimpleString(results.get(0))); assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n", toSimpleString(results.get(1))); - // Now user drills down on Publish Date/2010: fsp = new FacetSearchParams(); Query q2 = DrillDown.query(fsp, new MatchAllDocsQuery(), new CategoryPath("Publish Date/2010", '/')); fsp.addFacetRequest(new CountFacetRequest(new CategoryPath("Author"), 10)); c = new FacetsCollector(fsp, searcher.getIndexReader(), taxoReader); - searcher.search(q2, c); + c2 = new DocValuesFacetsCollector(fsp, taxoReader); + searcher.search(q2, MultiCollector.wrap(c, c2)); results = c.getFacetResults(); assertEquals(1, results.size()); + // nocommit fails because DocValuesFacetField doesn't + // index properly for drill downs!!! assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n", toSimpleString(results.get(0))); + assertEquals(results.toString(), c2.getFacetResults().toString()); taxoReader.close(); searcher.getIndexReader().close(); Index: lucene/facet/src/java/org/apache/lucene/document/PayloadFacetField.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/document/PayloadFacetField.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/document/PayloadFacetField.java (working copy) @@ -0,0 +1,91 @@ +package org.apache.lucene.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.facet.index.attributes.CategoryAttribute; +import org.apache.lucene.facet.index.attributes.CategoryAttributeImpl; +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.index.streaming.CategoryAttributesStream; +import org.apache.lucene.facet.index.streaming.CategoryListTokenizer; +import org.apache.lucene.facet.index.streaming.CategoryParentsStream; +import org.apache.lucene.facet.index.streaming.CategoryTokenizer; +import org.apache.lucene.facet.index.streaming.CountingListTokenizer; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.index.Term; + +// nocommit what happens if you add more than one of these +// fields to the index...? hmm multiple positions ... hmm +// nocommit single-valued/multi-valued? + +/** Sugar for the common case; use {@link + * CategoryDocumentBuilder} for expert cases. */ +public class PayloadFacetField extends Field { + + // nocommit call freeze on this once LUCENE-4621 is in: + private static final FacetIndexingParams DEFAULT_FACET_INDEXING_PARAMS = new DefaultFacetIndexingParams(); + + public static final FieldType TYPE = new FieldType(TextField.TYPE_NOT_STORED); + + static { + TYPE.setOmitNorms(true); + TYPE.freeze(); + } + + public PayloadFacetField(Iterable paths, TaxonomyWriter taxoWriter) { + this(DEFAULT_FACET_INDEXING_PARAMS, paths, taxoWriter); + } + + public PayloadFacetField(FacetIndexingParams indexingParams, Iterable categoryPaths, TaxonomyWriter taxoWriter) { + super(CategoryListParams.DEFAULT_TERM.field(), TYPE); + + // nocommit can replace with this: assert that FIP.getAllCLPs().size() == 1 + + List atts = new ArrayList(); + for(CategoryPath categoryPath : categoryPaths) { + Term term = indexingParams.getCategoryListParams(categoryPath).getTerm(); + if (!term.field().equals(CategoryListParams.DEFAULT_TERM.field())) { + // nocommit improve message: what configuration of + // FIP would result in different field names + // ... partitioning (or is that just different terms + // w/in same field...). + throw new IllegalArgumentException("use CategoryDocumentBuilder if you instead"); + } + atts.add(new CategoryAttributeImpl(categoryPath)); + } + + // TODO: can this be simplified? Maybe just compute the + // byte[] and use CannedTokenStream? + + CategoryAttributesStream categoryAttributesStream = new CategoryAttributesStream(atts); + // Set a suitable {@link TokenStream} using + // CategoryParentsStream, followed by CategoryListTokenizer and + // CategoryTokenizer composition (the ordering of the last two is + // not mandatory). + CategoryParentsStream parentsStream = new CategoryParentsStream(categoryAttributesStream, + taxoWriter, indexingParams); + CategoryListTokenizer categoryListTokenizer = new CountingListTokenizer(parentsStream, indexingParams); + CategoryTokenizer stream = new CategoryTokenizer(categoryListTokenizer, indexingParams); + setTokenStream(stream); + } +} Property changes on: lucene/facet/src/java/org/apache/lucene/document/PayloadFacetField.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/document/DocValuesFacetField.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/document/DocValuesFacetField.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/document/DocValuesFacetField.java (working copy) @@ -0,0 +1,112 @@ +package org.apache.lucene.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.facet.index.attributes.CategoryAttribute; +import org.apache.lucene.facet.index.attributes.CategoryAttributeImpl; +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.index.streaming.CategoryAttributesStream; +import org.apache.lucene.facet.index.streaming.CategoryListTokenizer; +import org.apache.lucene.facet.index.streaming.CategoryParentsStream; +import org.apache.lucene.facet.index.streaming.CategoryTokenizer; +import org.apache.lucene.facet.index.streaming.CountingListTokenizer; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.BytesRef; + +// nocommit what happens if you add more than one of these +// fields to the index...? hmm multiple positions ... hmm +// nocommit single-valued/multi-valued? + +/** Sugar for the common case; use {@link + * CategoryDocumentBuilder} for expert cases. */ +public class DocValuesFacetField extends Field { + + // nocommit is this safe? does it have mutable state? + private static final FacetIndexingParams DEFAULT_FACET_INDEXING_PARAMS = new DefaultFacetIndexingParams(); + + public static final FieldType TYPE = new FieldType(); + + static { + TYPE.setDocValueType(DocValues.Type.BYTES_VAR_STRAIGHT); + TYPE.freeze(); + } + + public DocValuesFacetField(Iterable paths, TaxonomyWriter taxoWriter) { + this(DEFAULT_FACET_INDEXING_PARAMS, paths, taxoWriter); + } + + public DocValuesFacetField(FacetIndexingParams indexingParams, Iterable categoryPaths, TaxonomyWriter taxoWriter) { + super(CategoryListParams.DEFAULT_TERM.field(), TYPE); + + // nocommit can replace with this: assert that FIP.getAllCLPs().size() == 1 + + List atts = new ArrayList(); + for(CategoryPath categoryPath : categoryPaths) { + Term term = indexingParams.getCategoryListParams(categoryPath).getTerm(); + if (!term.field().equals(CategoryListParams.DEFAULT_TERM.field())) { + // nocommit improve message: what configuration of + // FIP would result in different field names + // ... partitioning (or is that just different terms + // w/in same field...). + throw new IllegalArgumentException("ust CategoryDocumentBuilder instead"); + } + atts.add(new CategoryAttributeImpl(categoryPath)); + } + + CategoryAttributesStream categoryAttributesStream = new CategoryAttributesStream(atts); + // Set a suitable {@link TokenStream} using + // CategoryParentsStream, followed by CategoryListTokenizer and + // CategoryTokenizer composition (the ordering of the last two is + // not mandatory). + CategoryParentsStream parentsStream = new CategoryParentsStream(categoryAttributesStream, + taxoWriter, indexingParams); + CategoryListTokenizer categoryListTokenizer = new CountingListTokenizer(parentsStream, indexingParams); + CategoryTokenizer stream = new CategoryTokenizer(categoryListTokenizer, indexingParams); + + // nocommit total hack!!!! need more direct "getBytes" + // somewhere/how: + try { + PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); + stream.reset(); + + while(stream.incrementToken()) { + BytesRef payload = payloadAtt.getPayload(); + if (payload != null) { + // nocommit do i need copy ... + setBytesValue(payload); + break; + } + } + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + + // nocommit must still add token stream so we get + // drilldown! + } +} Property changes on: lucene/facet/src/java/org/apache/lucene/document/DocValuesFacetField.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesFacetsCollector.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesFacetsCollector.java (working copy) @@ -0,0 +1,123 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; + +// nocommit rename? +// nocommit document limitations & detect when they are +// violated (eg, only one partition, must be vInt(dgap)) + +public class DocValuesFacetsCollector extends Collector { + private final FacetSearchParams fsp; + private final TaxonomyReader taxoReader; + private final FacetArrays facetArrays; + // TODO: maybe sometimes int[] hash...? maybe start as + // int[] hash but then grow into full array? + // TODO: could be packed ints too, if we can accept a max + // facet count: + private final int[] counts; + private DocValues.Source currentReaderFacets; + + public DocValuesFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) { + this.fsp = fsp; + this.taxoReader = taxoReader; + // must pull array from here (instead of just a new + // int[]) in order to use later API to create + // FacetResults: + facetArrays = new FacetArrays(taxoReader.getSize()); + counts = facetArrays.getIntArray(); + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + currentReaderFacets = context.reader().docValues(CategoryListParams.DEFAULT_TERM.field()).getDirectSource(); + } + + private final BytesRef scratch = new BytesRef(); + + @Override + public void collect(int doc) throws IOException { + currentReaderFacets.getBytes(doc, scratch); + int upto = scratch.offset; + final int end = scratch.offset + scratch.length; + int ord = 0; + int accum = 0; + final byte[] bytes = scratch.bytes; + while(upto < end) { + byte b = bytes[upto++]; + accum = (accum << 7) | (b & 0x7F); + if (b >= 0) { + ord += accum; + counts[ord]++; + accum = 0; + } + } + assert accum == 0; + } + + public synchronized List getFacetResults() throws IOException { + List res = new ArrayList(); + for(FacetRequest fr : fsp.getFacetRequests()) { + FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxoReader); + IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(facetArrays, 0); + FacetResult facetRes = frHndlr.renderFacetResult(res4fr); + frHndlr.labelResult(facetRes); + res.add(facetRes); + } + + return res; + } + + public int[] getRawCounts() { + return counts; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + } +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesFacetsCollector.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property