Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 1292224) +++ CHANGES.txt (working copy) @@ -165,7 +165,11 @@ * LUCENE-3714: Add top N shortest cost paths search for FST. (Robert Muir, Dawid Weiss, Mike McCandless) - + +* LUCENE-3688: Added option for creating a facet request which looks at + a single category, collects that category's associations in the result + set and returns a facet result with buckets of association values. + Bug fixes * LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter Index: contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsIndexer.java =================================================================== --- contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsIndexer.java (revision 0) +++ contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsIndexer.java (revision 0) @@ -0,0 +1,196 @@ +package org.apache.lucene.facet.example.association.buckets; + +import java.util.HashSet; +import java.util.Random; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.facet.enhancements.EnhancementsDocumentBuilder; +import org.apache.lucene.facet.enhancements.association.AssociationFloatProperty; +import org.apache.lucene.facet.enhancements.association.AssociationIntProperty; +import org.apache.lucene.facet.enhancements.association.AssociationProperty; +import org.apache.lucene.facet.example.ExampleUtils; +import org.apache.lucene.facet.example.association.AssociationUtils; +import org.apache.lucene.facet.example.simple.SimpleUtils; +import org.apache.lucene.facet.index.CategoryContainer; +import org.apache.lucene.facet.index.CategoryDocumentBuilder; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; + +/** + * Sample indexer creates an index, and adds to it sample documents with + * categories, which can be simple or contain associations. + */ +public class AssociationBucketsIndexer { + + /** + * Create an index, and adds to it sample documents and categories. + * + * @param indexDir + * Directory in which the index should be created. + * @param taxoDir + * Directory in which the taxonomy index should be created. + * @param useNumericFields + * Whether Lucene numeric fields should be used. + * @throws Exception + * on error (no detailed exception handling here for sample + * simplicity + */ + public static void index(Directory indexDir, Directory taxoDir, + boolean useNumericFields) throws Exception { + + // create and open an index writer + IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig( + ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer)); + + // create and open a taxonomy writer + TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); + + Random random = new Random(49); + + // loop over sample documents + int nDocs = 100; + int nFacetsAdded = 0; + for (int docNum = 0; docNum < nDocs; docNum++) { + ExampleUtils.log(" ++++ DOC ID: " + docNum); + + Document doc = new Document(); + + // obtain categories with associations for current document + CategoryContainer categoryContainer = new CategoryContainer(); + int nCategories = random.nextInt(2) + 3; + CategoryPath[] categoryPaths = new CategoryPath[nCategories]; + AssociationProperty[] associationProps = new AssociationProperty[nCategories]; + generateCategoriesAndAssociations(categoryPaths, associationProps, + random); + for (int i = 0; i < categoryPaths.length; i++) { + categoryContainer.addCategory(categoryPaths[i], + associationProps[i]); + ExampleUtils.log("\t $$$$ Association: (" + categoryPaths[i] + + "," + associationProps[i] + ")"); + if (useNumericFields) { + NumericField numericField = getNumericField( + categoryPaths[i], associationProps[i]); + doc.add(numericField); + } + } + + // we do not alter indexing parameters! + // a category document builder will add the categories to a document + // once build() is called + CategoryDocumentBuilder categoryDocBuilder = new EnhancementsDocumentBuilder( + taxo, AssociationUtils.assocIndexingParams); + categoryDocBuilder.setCategories(categoryContainer); + + // create a plain Lucene document and add some regular Lucene fields + // to it + StringBuilder builder = new StringBuilder(); + addTerm(builder, random); + addTerm(builder, random); + doc.add(new Field(SimpleUtils.TITLE, builder.toString(), Store.YES, + Index.ANALYZED)); + int additionalText = random.nextInt(10); + for (int i = 0; i < additionalText; i++) { + addTerm(builder, random); + } + doc.add(new Field(SimpleUtils.TEXT, builder.toString(), Store.NO, + Index.ANALYZED)); + + // invoke the category document builder for adding categories to the + // document and, + // as required, to the taxonomy index + categoryDocBuilder.build(doc); + + // finally add the document to the index + iw.addDocument(doc); + + nFacetsAdded += categoryContainer.size(); + } + + // commit changes. + // we commit changes to the taxonomy index prior to committing them to + // the search index. + // this is important, so that all facets referred to by documents in the + // search index + // will indeed exist in the taxonomy index. + taxo.commit(); + iw.commit(); + + // close the taxonomy index and the index - all modifications are + // now safely in the provided directories: indexDir and taxoDir. + taxo.close(); + iw.close(); + + ExampleUtils.log("Indexed " + nDocs + " documents with overall " + + nFacetsAdded + " facets."); + } + + private static void addTerm(StringBuilder builder, Random random) { + int index = random.nextInt(AssociationBucketsUtils.terms.length); + builder.append(AssociationBucketsUtils.terms[index]); + builder.append(' '); + } + + /** + * @param categoryPaths + * @param associationProps + * @param i + * @return + */ + private static NumericField getNumericField(CategoryPath categoryPath, + AssociationProperty associationProp) { + NumericField numericField = new NumericField(categoryPath.toString()); + if (associationProp instanceof AssociationIntProperty) { + numericField.setIntValue(((AssociationIntProperty) associationProp) + .getAssociation()); + } else if (associationProp instanceof AssociationFloatProperty) { + numericField + .setFloatValue(((AssociationFloatProperty) associationProp) + .getFloatAssociation()); + } + return numericField; + } + + private static void generateCategoriesAndAssociations( + CategoryPath[] categoryPaths, + AssociationProperty[] associationProps, Random random) { + HashSet assigned = new HashSet(); + + int intRange = categoryPaths.length * 2; + for (int i = 0; i < categoryPaths.length; i++) { + if (random.nextBoolean()) { + // create int + int next = random.nextInt(intRange); + String str = "i" + next; + if (assigned.contains(str)) { + i--; + } else { + assigned.add(str); + categoryPaths[i] = new CategoryPath(str); + associationProps[i] = new AssociationIntProperty( + random.nextInt(100)); + } + } else { + // create float + int next = random.nextInt(intRange); + String str = "f" + next; + if (assigned.contains(str)) { + i--; + } else { + assigned.add(str); + categoryPaths[i] = new CategoryPath(str); + float f = ((float) random.nextInt(10000)) / 10000; + associationProps[i] = new AssociationFloatProperty(f); + } + } + } + } +} Index: contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsMain.java =================================================================== --- contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsMain.java (revision 0) +++ contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsMain.java (revision 0) @@ -0,0 +1,72 @@ +package org.apache.lucene.facet.example.association.buckets; + +import java.util.List; + +import org.apache.lucene.facet.example.ExampleResult; +import org.apache.lucene.facet.example.ExampleUtils; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +/** Driver for the simple sample. */ +public class AssociationBucketsMain { + + /** + * Driver for the simple sample. + * + * @throws Exception + * on error (no detailed exception handling here for sample + * simplicity + */ + public static void main(String[] args) throws Exception { + // create Directories for the search index and for the taxonomy index + Directory indexDir = new RAMDirectory(); + Directory taxoDir = new RAMDirectory(); + + new AssociationBucketsMain().runDynamicSample(indexDir, taxoDir, false); + new AssociationBucketsMain().runDynamicSample(indexDir, taxoDir, true); + + new AssociationBucketsMain().runDefinedSample(indexDir, taxoDir, + false); + new AssociationBucketsMain().runDefinedSample(indexDir, taxoDir, + true); + + indexDir.close(); + taxoDir.close(); + + ExampleUtils.log("DONE"); + } + + public ExampleResult runDynamicSample(Directory indexDir, + Directory taxoDir, boolean useNumericFields) throws Exception { + + // index the sample documents + ExampleUtils.log("index the sample documents..."); + AssociationBucketsIndexer.index(indexDir, taxoDir, useNumericFields); + + ExampleUtils.log("search the sample documents..."); + List facetRes = AssociationBucketsSearcher.searchDynamic( + indexDir, taxoDir); + + ExampleResult res = new ExampleResult(); + res.setFacetResults(facetRes); + return res; + } + + public ExampleResult runDefinedSample(Directory indexDir, + Directory taxoDir, boolean useNumericFields) throws Exception { + + // index the sample documents + ExampleUtils.log("index the sample documents..."); + AssociationBucketsIndexer.index(indexDir, taxoDir, useNumericFields); + + ExampleUtils.log("search the sample documents..."); + List facetRes = AssociationBucketsSearcher + .searchDefined(indexDir, taxoDir); + + ExampleResult res = new ExampleResult(); + res.setFacetResults(facetRes); + return res; + } + +} Index: contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsSearcher.java =================================================================== --- contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsSearcher.java (revision 0) +++ contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsSearcher.java (revision 0) @@ -0,0 +1,71 @@ +package org.apache.lucene.facet.example.association.buckets; + +import java.util.List; + +import org.apache.lucene.facet.enhancements.association.buckets.AssociationFloatBucketsFacetRequest; +import org.apache.lucene.facet.enhancements.association.buckets.AssociationIntBucketsFacetRequest; +import org.apache.lucene.facet.example.association.AssociationUtils; +import org.apache.lucene.facet.example.simple.SimpleSearcher; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.Directory; + +/** + * AssociationSearcher searches index with facets, evaluating the facets with + * their associated $int value + */ +public class AssociationBucketsSearcher { + + public static List searchDynamic(Directory indexDir, + Directory taxoDir) throws Exception { + // prepare index reader and taxonomy. + TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); + IndexReader indexReader = IndexReader.open(indexDir); + + // create facet requests + AssociationIntBucketsFacetRequest intRangeFacetRequest = new AssociationIntBucketsFacetRequest( + new CategoryPath("i2"), 3, AssociationUtils.assocIndexingParams); + AssociationFloatBucketsFacetRequest floatRangeFacetRequest = new AssociationFloatBucketsFacetRequest( + new CategoryPath("f1"), 3, AssociationUtils.assocIndexingParams); + + List res = SimpleSearcher.searchWithRequest(indexReader, taxo, + AssociationUtils.assocIndexingParams, intRangeFacetRequest, + floatRangeFacetRequest); + + // close readers + taxo.close(); + indexReader.close(); + + return res; + } + + public static List searchDefined(Directory indexDir, + Directory taxoDir) throws Exception { + // prepare index reader and taxonomy. + TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); + IndexReader indexReader = IndexReader.open(indexDir); + + // create facet requests + AssociationIntBucketsFacetRequest intRangeFacetRequest = new AssociationIntBucketsFacetRequest( + new CategoryPath("i2"), AssociationBucketsUtils.intRanges, + AssociationUtils.assocIndexingParams); + + AssociationFloatBucketsFacetRequest floatRangeFacetRequest = new AssociationFloatBucketsFacetRequest( + new CategoryPath("f1"), AssociationBucketsUtils.floatRanges, + AssociationUtils.assocIndexingParams); + + List res = SimpleSearcher.searchWithRequest(indexReader, taxo, + AssociationUtils.assocIndexingParams, intRangeFacetRequest, + floatRangeFacetRequest); + + // close readers + taxo.close(); + indexReader.close(); + + return res; + } + +} Index: contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsUtils.java =================================================================== --- contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsUtils.java (revision 0) +++ contrib/facet/src/examples/org/apache/lucene/facet/example/association/buckets/AssociationBucketsUtils.java (revision 0) @@ -0,0 +1,29 @@ +package org.apache.lucene.facet.example.association.buckets; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.facet.enhancements.association.buckets.AssociationsFloatBucket; +import org.apache.lucene.facet.enhancements.association.buckets.AssociationsIntBucket; +import org.apache.lucene.facet.enhancements.association.buckets.AssociationsBucket; + +public class AssociationBucketsUtils { + + public static String[] terms = new String[] {"white", "black", "green", + "blue", "car", "ship", "airplane", "is", "a", "moving"}; + + public static List intRanges = new ArrayList(); + static { + intRanges.add(new AssociationsIntBucket(0, 10)); + intRanges.add(new AssociationsIntBucket(11, 20)); + intRanges.add(new AssociationsIntBucket(21, 30)); + intRanges.add(new AssociationsIntBucket(31, 40)); + } + + public static List floatRanges = new ArrayList(); + static { + floatRanges.add(new AssociationsFloatBucket(0f, 0.5f)); + floatRanges.add(new AssociationsFloatBucket(0.5f, 10f)); + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsAggregator.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsAggregator.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsAggregator.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.io.IOException; + +import org.apache.lucene.facet.enhancements.EnhancementsPayloadIterator; +import org.apache.lucene.facet.enhancements.association.AssociationEnhancement; +import org.apache.lucene.facet.search.aggregator.Aggregator; + +/** + * Aggregate association values for a certain category, and put them in buckets. + */ +public abstract class AssociationBucketsAggregator implements Aggregator { + + protected final AssociationEnhancement associationEnhancement; + protected EnhancementsPayloadIterator payloadIterator; + protected long totalCount; + + /** + * Constructor. + * + * @param associationEnhancement + * AssociationEnhancement to identify association values. + * @param payloadIterator + * An iterator over the payloads of the requested category's term. + */ + public AssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator) { + this.associationEnhancement = associationEnhancement; + this.payloadIterator = payloadIterator; + totalCount = 0; + } + + public void setNextDoc(int docid, float score) throws IOException { + if (payloadIterator.setdoc(docid)) { + aggregate(-1); + } + } + + public abstract void aggregate(int ordinal); + + /** + * Get the total count of associations encountered. + */ + public long getTotalCount() { + return totalCount; + } +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsDrillDown.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsDrillDown.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsDrillDown.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams; +import org.apache.lucene.facet.search.DrillDown; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +/** + * Creation of drill down query for categories with association values in a + * given bucket's range, for instance results of + * {@link AssociationBucketsFacetRequest}. + */ +public class AssociationBucketsDrillDown { + + /** + * Return a query for drilling down into a given category with a specific + * associations bucket range. + */ + public static final Query query(EnhancementsIndexingParams iParams, + CategoryPath path, AssociationsBucket associationBucket) { + Term term = DrillDown.term(iParams, path); + return associationBucket.getRangeQuery(term.field()); + } + + /** + * Turn a base query into a drilling-down query for a given category with a + * specific association bucket's range. + */ + public static final Query query(EnhancementsIndexingParams iParams, + Query baseQuery, CategoryPath path, AssociationsBucket associationBucket) { + BooleanQuery res = new BooleanQuery(); + res.add(baseQuery, Occur.MUST); + res.add(query(iParams, path, associationBucket), Occur.MUST); + return res; + } +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetRequest.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetRequest.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetRequest.java (revision 0) @@ -0,0 +1,181 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.facet.enhancements.CategoryEnhancement; +import org.apache.lucene.facet.enhancements.EnhancementsPayloadIterator; +import org.apache.lucene.facet.enhancements.association.AssociationEnhancement; +import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams; +import org.apache.lucene.facet.search.CategoryListIterator; +import org.apache.lucene.facet.search.DrillDown; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.encoding.IntDecoder; + +/** + * A {@link FacetRequest} which looks at a single category, collects that + * category's associations, and returns a result with buckets of association + * values. + *

+ * The definition of buckets can be done in two modes: + *

    + *
  1. Defined buckets: buckets are defined outside the facet request, and given + * through the constructor + * {@link #AssociationBucketsFacetRequest(CategoryPath, Collection, EnhancementsIndexingParams) + * . During the collection process each association encountered is counted in + * the proper pre-defined bucket.
  2. Dynamic Buckets: in this case the facet + * request is defined by the number of required buckets given as the "numBuckets" + * parameter in + * @link {@link #AssociationBucketsFacetRequest(CategoryPath, int, EnhancementsIndexingParams)} + * . The association results are collected, and at the end of the collection + * process the buckets are dynamically calculated and filled. + * NOTE: the collection process in this mode may require large memory + * resources. + *
+ */ +public abstract class AssociationBucketsFacetRequest extends FacetRequest { + + private EnhancementsIndexingParams enhancementsIndexingParams; + protected AssociationBucketsAggregator aggregator; + protected Collection buckets; + + /** + * Buckets created dynamically according to the number of requested buckets. + * + * @param path + * The category to find buckets for. + * @param numBuckets + * The maximal number of required buckets. + * @param enhancementsIndexingParams + * Indexing params to use. + */ + public AssociationBucketsFacetRequest(CategoryPath path, int numBuckets, + EnhancementsIndexingParams enhancementsIndexingParams) { + super(path, numBuckets); + super.setDepth(0); + super.setResultMode(ResultMode.GLOBAL_FLAT); + this.enhancementsIndexingParams = enhancementsIndexingParams; + } + + /** + * Using pre-defined buckets. + * + * @param path + * The category to find buckets for. + * @param buckets + * The pre-defined buckets to use. + * @param enhancementsIndexingParams + * Indexing params to use. + */ + public AssociationBucketsFacetRequest(CategoryPath path, + Collection buckets, + EnhancementsIndexingParams enhancementsIndexingParams) { + super(path, buckets.size()); + super.setDepth(0); + super.setResultMode(ResultMode.GLOBAL_FLAT); + this.buckets = buckets; + this.enhancementsIndexingParams = enhancementsIndexingParams; + } + + @Override + public void setDepth(int depth) { + if (depth != 0) { + throw new IllegalArgumentException("Cannot set a dpeth other than 0 for " + + getClass().getSimpleName()); + } + } + + @Override + public void setResultMode(ResultMode resultMode) { + if (resultMode != ResultMode.GLOBAL_FLAT) { + throw new IllegalArgumentException("Only " + ResultMode.GLOBAL_FLAT + + " mode legal"); + } + } + + @Override + public Aggregator createAggregator(boolean useComplements, + FacetArrays arrays, IndexReader indexReader, TaxonomyReader taxonomy) + throws IOException { + Term term = DrillDown.term(enhancementsIndexingParams, getCategoryPath()); + EnhancementsPayloadIterator payloadIterator = new EnhancementsPayloadIterator( + enhancementsIndexingParams.getCategoryEnhancements(), indexReader, term); + if (!payloadIterator.init()) { + throw new IOException("No association values for term " + term); + } + AssociationEnhancement associationEnhancement = null; + for (CategoryEnhancement enhancement : enhancementsIndexingParams + .getCategoryEnhancements()) { + if (enhancement instanceof AssociationEnhancement) { + associationEnhancement = (AssociationEnhancement) enhancement; + break; + } + } + if (associationEnhancement == null) { + throw new IOException( + "Cannot apply buckets facet request without associations"); + } + if (buckets == null) { + aggregator = getDynamicAssociationBucketsAggregator( + associationEnhancement, payloadIterator, getNumLabel()); + } else { + aggregator = getDefinedAssociationBucketsAggregator( + associationEnhancement, payloadIterator); + } + return aggregator; + } + + /** + * Get a dynamic aggregator. + */ + protected abstract AssociationBucketsAggregator getDynamicAssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, int numBuckets); + + /** + * Get an aggregator using pre-defined buckets. + */ + protected abstract AssociationBucketsAggregator getDefinedAssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator); + + @Override + public CategoryListIterator createCategoryListIterator(IndexReader reader, + TaxonomyReader taxo, FacetSearchParams sParams, int partition) + throws IOException { + // we need to create a new iterator every time as iterator equality is + // used in StandardFacetsAccumulator.getCategoryListMap + return new CategoryListIterator() { + + public boolean init() throws IOException { + return true; + } + + public boolean skipTo(int docId) throws IOException { + return true; + } + + public long nextCategory() throws IOException { + return IntDecoder.EOS; + } + }; + } + + @Override + public double getValueOf(FacetArrays arrays, int idx) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean requireDocumentScore() { + return false; + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResult.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResult.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResult.java (revision 0) @@ -0,0 +1,19 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; + +/** + * A {@link FacetResult} containing association buckets results. + */ +class AssociationBucketsFacetResult extends FacetResult implements + IntermediateFacetResult { + + AssociationBucketsFacetResult(FacetRequest facetRequest, + FacetResultNode rootNode, int numValidDescendants) { + super(facetRequest, rootNode, numValidDescendants); + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResultNode.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResultNode.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResultNode.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import org.apache.lucene.facet.search.results.MutableFacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/** + * Facet result node containing an association bucket of a certain category. + */ +public class AssociationBucketsFacetResultNode extends MutableFacetResultNode { + + private AssociationsBucket bucket; + + public AssociationBucketsFacetResultNode(int ordinal, double value, + CategoryPath label, AssociationsBucket bucket) { + super(ordinal, value, 0, label, null); + this.bucket = bucket; + } + + public AssociationsBucket getBucket() { + return bucket; + } + + @Override + public String toString(String prefix) { + return prefix + "[bucket: " + bucket.toString() + "]"; + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResultsHandler.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResultsHandler.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationBucketsFacetResultsHandler.java (revision 0) @@ -0,0 +1,139 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetResultsHandler; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.search.results.MutableFacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.util.collections.ObjectToIntMap; + +/** + * Base implementation of {@link FacetResultsHandler} for handling + * {@link AssociationBucketsFacetRequest}s. + */ +public abstract class AssociationBucketsFacetResultsHandler extends + FacetResultsHandler { + + private AssociationBucketsAggregator aggregator; + + public AssociationBucketsFacetResultsHandler(TaxonomyReader taxonomyReader, + FacetRequest facetRequest, AssociationBucketsAggregator aggregator) { + super(taxonomyReader, facetRequest); + this.aggregator = aggregator; + } + + @Override + public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, + int offset) throws IOException { + CategoryPath categoryPath = facetRequest.getCategoryPath(); + int ordinal = taxonomyReader.getOrdinal(categoryPath); + IntermediateFacetResult bucketsFacetResults; + if (aggregator instanceof AssociationsBucketsDynamicAggregator) { + bucketsFacetResults = getDynamicFacetResults(categoryPath, ordinal, + (AssociationsBucketsDynamicAggregator) aggregator); + } else if (aggregator instanceof AssociationsBucketsDefinedAggregator) { + bucketsFacetResults = getDefinedFacetResults(categoryPath, ordinal, + (AssociationsBucketsDefinedAggregator) aggregator); + } else { + throw new UnsupportedOperationException(); + } + return bucketsFacetResults; + } + + private IntermediateFacetResult getDefinedFacetResults( + CategoryPath categoryPath, int ordinal, + AssociationsBucketsDefinedAggregator definedAggregator) { + ObjectToIntMap bucketCounts = definedAggregator + .getBucketCounts(); + + if (bucketCounts.size() == 0) { + // return empty result + MutableFacetResultNode rootNode = new MutableFacetResultNode(ordinal, 0); + rootNode.setResidue(definedAggregator.getResidue()); + return new AssociationBucketsFacetResult(facetRequest, rootNode, 0); + } + + // add sub nodes + List subnodes = new ArrayList(); + Iterator keyIterator = bucketCounts.keyIterator(); + while (keyIterator.hasNext()) { + AssociationsBucket bucket = keyIterator.next(); + int count = bucketCounts.get(bucket); + if (count > 0) { + subnodes.add(new AssociationBucketsFacetResultNode(ordinal, count, categoryPath, + bucket)); + } + } + FacetResultNode rootNode = new MutableFacetResultNode(ordinal, 0, + definedAggregator.getResidue(), categoryPath, subnodes); + return new AssociationBucketsFacetResult(facetRequest, rootNode, subnodes.size()); + } + + private IntermediateFacetResult getDynamicFacetResults( + CategoryPath categoryPath, int ordinal, + AssociationsBucketsDynamicAggregator dynamicAggregator) { + // we need to convert associations into either int or float and then + // to double, and build arrays for sorting + double[] lowerBounds = dynamicAggregator.getLowerBounds(); + double[] upperBounds = dynamicAggregator.getUpperBounds(); + int[] bucketCounts = dynamicAggregator.getBucketCounts(); + + if (lowerBounds.length == 0) { + // return empty result + FacetResultNode rootNode = new MutableFacetResultNode(ordinal, 0); + return new AssociationBucketsFacetResult(facetRequest, rootNode, 0); + } + + List subnodes = new ArrayList(); + for (int i = 0; i < bucketCounts.length; i++) { + AssociationsBucket bucket = getAssociationsBucket(lowerBounds[i], + upperBounds[i]); + subnodes.add(new AssociationBucketsFacetResultNode(ordinal, bucketCounts[i], + categoryPath, bucket)); + } + FacetResultNode rootNode = new MutableFacetResultNode(ordinal, 0, 0, + categoryPath, subnodes); + return new AssociationBucketsFacetResult(facetRequest, rootNode, subnodes.size()); + } + + /** + * Get an {@link AssociationsBucket} given lower and upper bounds. + */ + protected abstract AssociationsBucket getAssociationsBucket( + double loewrBound, double upperBound); + + @Override + public IntermediateFacetResult mergeResults( + IntermediateFacetResult... tmpResults) throws IOException, + ClassCastException, IllegalArgumentException { + // no merge required - all done in first pass (fetchPartitionResult) + return tmpResults[0]; + } + + @Override + public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) + throws IOException { + return (AssociationBucketsFacetResult) tmpResult; + } + + @Override + public FacetResult rearrangeFacetResult(FacetResult facetResult) { + // no rearranging + return facetResult; + } + + @Override + public void labelResult(FacetResult facetResult) throws IOException { + // no labeling required, already done + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationFloatBucketsFacetRequest.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationFloatBucketsFacetRequest.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationFloatBucketsFacetRequest.java (revision 0) @@ -0,0 +1,122 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.util.Collection; + +import org.apache.lucene.facet.enhancements.EnhancementsPayloadIterator; +import org.apache.lucene.facet.enhancements.association.AssociationEnhancement; +import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams; +import org.apache.lucene.facet.search.FacetResultsHandler; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * An {@link AssociationBucketsFacetRequest} for a category with float + * associations. + */ +public class AssociationFloatBucketsFacetRequest extends + AssociationBucketsFacetRequest { + + /** + * Buckets created dynamically according to the number of requested buckets. + * + * @param path + * The category to find buckets for. + * @param numBuckets + * The maximal number of required buckets. + * @param enhancementsIndexingParams + * Indexing params to use. + */ + public AssociationFloatBucketsFacetRequest(CategoryPath path, int numBuckets, + EnhancementsIndexingParams enhancementsIndexingParams) { + super(path, numBuckets, enhancementsIndexingParams); + } + + /** + * Using pre-defined buckets. + * + * @param path + * The category to find buckets for. + * @param buckets + * The pre-defined buckets to use. + * @param enhancementsIndexingParams + * Indexing params to use. + */ + public AssociationFloatBucketsFacetRequest(CategoryPath path, + Collection buckets, + EnhancementsIndexingParams enhancementsIndexingParams) { + super(path, buckets, enhancementsIndexingParams); + } + + @Override + public FacetResultsHandler createFacetResultsHandler( + TaxonomyReader taxonomyReader) { + return new AssociationFloatBucketsFacetResultsHandler(taxonomyReader, this, + aggregator); + } + + private class AssociationFloatBucketsFacetResultsHandler extends + AssociationBucketsFacetResultsHandler { + + public AssociationFloatBucketsFacetResultsHandler( + TaxonomyReader taxonomyReader, FacetRequest facetRequest, + AssociationBucketsAggregator aggregator) { + super(taxonomyReader, facetRequest, aggregator); + } + + @Override + protected AssociationsBucket getAssociationsBucket(double lowerBound, + double upperBound) { + return new AssociationsFloatBucket((float) lowerBound, (float) upperBound); + } + + } + + @Override + protected AssociationBucketsAggregator getDynamicAssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, int numBuckets) { + return new DynamicFloatAggregator(associationEnhancement, payloadIterator, + numBuckets); + } + + private class DynamicFloatAggregator extends AssociationsBucketsDynamicAggregator { + + public DynamicFloatAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, int numBuckets) { + super(associationEnhancement, payloadIterator, numBuckets); + } + + @Override + protected double getValue(int assoc) { + return Float.intBitsToFloat(assoc); + } + + } + + @Override + protected AssociationBucketsAggregator getDefinedAssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator) { + return new DefinedFloatBucketsAggregator(associationEnhancement, + payloadIterator); + } + + class DefinedFloatBucketsAggregator extends AssociationsBucketsDefinedAggregator { + + public DefinedFloatBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator) { + super(associationEnhancement, payloadIterator, buckets); + } + + @Override + protected boolean bucketContainsAssociation(AssociationsBucket bucket, + int association) { + return ((AssociationsFloatBucket) bucket).contains(Float + .intBitsToFloat(association)); + } + + } +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationIntBucketsFacetRequest.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationIntBucketsFacetRequest.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationIntBucketsFacetRequest.java (revision 0) @@ -0,0 +1,120 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.util.Collection; + +import org.apache.lucene.facet.enhancements.EnhancementsPayloadIterator; +import org.apache.lucene.facet.enhancements.association.AssociationEnhancement; +import org.apache.lucene.facet.enhancements.params.EnhancementsIndexingParams; +import org.apache.lucene.facet.search.FacetResultsHandler; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * An {@link AssociationBucketsFacetRequest} for a category with int + * associations. + */ +public class AssociationIntBucketsFacetRequest extends + AssociationBucketsFacetRequest { + + /** + * Buckets created dynamically according to the number of requested buckets. + * + * @param path + * The category to find buckets for. + * @param numBuckets + * The maximal number of required buckets. + * @param enhancementsIndexingParams + * Indexing params to use. + */ + public AssociationIntBucketsFacetRequest(CategoryPath path, int numBuckets, + EnhancementsIndexingParams enhancementsIndexingParams) { + super(path, numBuckets, enhancementsIndexingParams); + } + + /** + * Using pre-defined buckets. + * + * @param path + * The category to find buckets for. + * @param buckets + * The pre-defined buckets to use. + * @param enhancementsIndexingParams + * Indexing params to use. + */ + public AssociationIntBucketsFacetRequest(CategoryPath path, + Collection buckets, + EnhancementsIndexingParams enhancementsIndexingParams) { + super(path, buckets, enhancementsIndexingParams); + } + + @Override + public FacetResultsHandler createFacetResultsHandler( + TaxonomyReader taxonomyReader) { + return new AssociationIntBucketsFacetResultsHandler(taxonomyReader, this, + aggregator); + } + + private class AssociationIntBucketsFacetResultsHandler extends + AssociationBucketsFacetResultsHandler { + + public AssociationIntBucketsFacetResultsHandler( + TaxonomyReader taxonomyReader, FacetRequest facetRequest, + AssociationBucketsAggregator aggregator) { + super(taxonomyReader, facetRequest, aggregator); + } + + @Override + protected AssociationsBucket getAssociationsBucket(double lowerBound, + double upperBound) { + return new AssociationsIntBucket((int) lowerBound, (int) upperBound); + } + + } + + @Override + protected AssociationBucketsAggregator getDynamicAssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, int numBuckets) { + return new DynamicIntAggregator(associationEnhancement, payloadIterator, + numBuckets); + } + + class DynamicIntAggregator extends AssociationsBucketsDynamicAggregator { + + public DynamicIntAggregator(AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, int numBuckets) { + super(associationEnhancement, payloadIterator, numBuckets); + } + + @Override + protected double getValue(int assoc) { + return assoc; + } + + } + + @Override + protected AssociationBucketsAggregator getDefinedAssociationBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator) { + return new DefinedIntBucketsAggregator(associationEnhancement, + payloadIterator); + } + + class DefinedIntBucketsAggregator extends AssociationsBucketsDefinedAggregator { + + public DefinedIntBucketsAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator) { + super(associationEnhancement, payloadIterator, buckets); + } + + @Override + protected boolean bucketContainsAssociation(AssociationsBucket range, + int association) { + return ((AssociationsIntBucket) range).contains(association); + } + + } +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucket.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucket.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucket.java (revision 0) @@ -0,0 +1,30 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.NumericRangeFilter; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; + +/** + * An associations bucket, defined by lower and upper bounds. + */ +public interface AssociationsBucket { + + /** + * Get a query which returns only documents matching this bucket's range. + * + * @param field + * The numeric field to use. + * @return A {@link NumericRangeQuery}. + */ + public Query getRangeQuery(String field); + + /** + * Get a filter which accepts only documents matching this bucket's range. + * + * @param field + * The numeric field to use. + * @return A {@link NumericRangeFilter}. + */ + public Filter getRangeFilter(String field); +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucketsDefinedAggregator.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucketsDefinedAggregator.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucketsDefinedAggregator.java (revision 0) @@ -0,0 +1,77 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.util.Collection; +import java.util.Iterator; + +import org.apache.lucene.facet.enhancements.EnhancementsPayloadIterator; +import org.apache.lucene.facet.enhancements.association.AssociationEnhancement; +import org.apache.lucene.util.collections.ObjectToIntMap; + +/** + * Aggregate associations into given pre-defined buckets. + */ +public abstract class AssociationsBucketsDefinedAggregator extends + AssociationBucketsAggregator { + + private ObjectToIntMap bucketCounts; + private int residue; + + public AssociationsBucketsDefinedAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, + Collection buckets) { + super(associationEnhancement, payloadIterator); + bucketCounts = new ObjectToIntMap(buckets.size()); + for (AssociationsBucket associationBucket : buckets) { + bucketCounts.put(associationBucket, 0); + } + residue = 0; + } + + @Override + public void aggregate(int ordinal) { + int association = 0; + if (ordinal == -1) { + Integer associationObject = (Integer) payloadIterator + .getCategoryData(associationEnhancement); + if (associationObject == null) { + return; + } + association = associationObject.intValue(); + + // TODO: the following loop can be optimized if we use a data structure + // which supports efficient identification of all buckets matching a given + // association + boolean found = false; + Iterator bucketsIterator = bucketCounts.keyIterator(); + while (bucketsIterator.hasNext()) { + AssociationsBucket bucket = bucketsIterator.next(); + if (bucketContainsAssociation(bucket, association)) { + int count = bucketCounts.get(bucket); + bucketCounts.put(bucket, count + 1); + found = true; + } + } + + if (!found) { + residue++; + } + } + } + + protected abstract boolean bucketContainsAssociation( + AssociationsBucket bucket, int association); + + public ObjectToIntMap getBucketCounts() { + return bucketCounts; + } + + public int getResidue() { + return residue; + } + + protected double getValue(int assoc) { + throw new UnsupportedOperationException(); + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucketsDynamicAggregator.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucketsDynamicAggregator.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsBucketsDynamicAggregator.java (revision 0) @@ -0,0 +1,164 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.util.Arrays; + +import org.apache.lucene.facet.enhancements.EnhancementsPayloadIterator; +import org.apache.lucene.facet.enhancements.association.AssociationEnhancement; +import org.apache.lucene.util.collections.IntIterator; +import org.apache.lucene.util.collections.IntToIntMap; + +/** + * Aggregate into an {@link IntToIntMap} occurrence counts of different + * associations in the documents given. + */ +public abstract class AssociationsBucketsDynamicAggregator extends + AssociationBucketsAggregator { + + private int numBuckets; + private IntToIntMap map; + + private double[] lowerBounds; + private double[] upperBounds; + private int[] bucketCounts; + + /** + * Constructor + * + * @param associationEnhancement + * AssociationEnhancement to identify association values. + * @param payloadIterator + * An iterator over the payloads of the requested category's term. + * @param numBuckets + * The maximal number of buckets to allocate. + */ + public AssociationsBucketsDynamicAggregator( + AssociationEnhancement associationEnhancement, + EnhancementsPayloadIterator payloadIterator, int numBuckets) { + super(associationEnhancement, payloadIterator); + this.numBuckets = numBuckets; + this.map = new IntToIntMap(); + } + + @Override + public void aggregate(int ordinal) { + int association = 0; + if (ordinal == -1) { + Integer associationObject = (Integer) payloadIterator + .getCategoryData(associationEnhancement); + if (associationObject == null) { + return; + } + association = associationObject.intValue(); + } + addAssociation(association); + } + + /** + * Add an association to the map. + * + * @param association + * The association to add. + */ + private void addAssociation(int association) { + if (map.containsKey(association)) { + int count = map.get(association); + map.put(association, ++count); + } else { + map.put(association, 1); + } + totalCount++; + } + + /** + * Get an array containing the lower bounds of the bucket ranges. + */ + double[] getLowerBounds() { + if (lowerBounds == null) { + generateBuckets(); + } + return lowerBounds; + } + + /** + * Get an array containing the upper bounds of the bucket ranges. + */ + double[] getUpperBounds() { + if (lowerBounds == null) { + generateBuckets(); + } + return upperBounds; + } + + public int[] getBucketCounts() { + if (lowerBounds == null) { + generateBuckets(); + } + return bucketCounts; + } + + /** + * Generate the buckets. + */ + private void generateBuckets() { + double[] values = new double[map.size()]; + int[] counts = new int[map.size()]; + fillArrays(values, counts); + + // sort by values + new ValuesAndCountsSorter(values, counts).sort(); + + // generate the buckets + if (numBuckets > map.size()) { + numBuckets = map.size(); + } + lowerBounds = new double[numBuckets]; + upperBounds = new double[numBuckets]; + bucketCounts = new int[numBuckets]; + generateBuckets(values, counts); + values = null; + counts = null; + + } + + private void fillArrays(double[] values, int[] counts) { + IntIterator iterator = map.keyIterator(); + for (int i = 0; i < values.length; i++) { + int assoc = iterator.next(); + values[i] = getValue(assoc); + counts[i] = map.get(assoc); + } + } + + private void generateBuckets(double[] values, int[] counts) { + if (numBuckets == 0) { + return; + } + + Arrays.fill(lowerBounds, Double.NaN); + Arrays.fill(upperBounds, Double.NaN); + long bucketThreshold = totalCount / numBuckets; + int bucket = 0; + int countSum = 0; + int bucketCount = 0; + for (int i = 0; i < values.length; i++) { + if (Double.isNaN(lowerBounds[bucket])) { + lowerBounds[bucket] = values[i]; + } + countSum += counts[i]; + bucketCount += counts[i]; + if (countSum >= bucketThreshold) { + upperBounds[bucket] = values[i]; + bucketCounts[bucket] = bucketCount; + bucket++; + bucketCount = 0; + bucketThreshold = (totalCount * (bucket + 1)) / numBuckets; + } + } + } + + /** + * Get a double value for a given association + */ + protected abstract double getValue(int assoc); + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsFloatBucket.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsFloatBucket.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsFloatBucket.java (revision 0) @@ -0,0 +1,45 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.NumericRangeFilter; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; + +/** + * An {@link AssociationsBucket} with float bounds. + */ +public class AssociationsFloatBucket implements AssociationsBucket { + + private float lowerBound; + private float upperBound; + + public AssociationsFloatBucket(float lowerBound, float upperBound) { + this.lowerBound = lowerBound; + this.upperBound = upperBound; + } + + public Query getRangeQuery(String field) { + return NumericRangeQuery.newFloatRange(field, lowerBound, upperBound, true, + true); + } + + public Filter getRangeFilter(String field) { + return NumericRangeFilter.newFloatRange(field, lowerBound, upperBound, true, + true); + } + + final boolean contains(float association) { + return lowerBound <= association && association <= upperBound; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append('['); + builder.append(lowerBound); + builder.append('-'); + builder.append(upperBound); + builder.append(']'); + return builder.toString(); + } +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsIntBucket.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsIntBucket.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/AssociationsIntBucket.java (revision 0) @@ -0,0 +1,45 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.NumericRangeFilter; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; + +/** + * An {@link AssociationsBucket} with int bounds. + */ +public class AssociationsIntBucket implements AssociationsBucket { + + private int lowerBound; + private int upperBound; + + public AssociationsIntBucket(int lowerBound, int upperBound) { + this.lowerBound = lowerBound; + this.upperBound = upperBound; + } + + public Query getRangeQuery(String field) { + return NumericRangeQuery.newIntRange(field, lowerBound, upperBound, true, + true); + } + + public Filter getRangeFilter(String field) { + return NumericRangeFilter.newIntRange(field, lowerBound, upperBound, true, + true); + } + + final boolean contains(int association) { + return lowerBound <= association && association <= upperBound; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append('['); + builder.append(lowerBound); + builder.append('-'); + builder.append(upperBound); + builder.append(']'); + return builder.toString(); + } +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/ValuesAndCountsSorter.java =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/ValuesAndCountsSorter.java (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/ValuesAndCountsSorter.java (revision 0) @@ -0,0 +1,82 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +/** + * Sorts an array of unique double values, keeping the counts in the counts + * array matching the values. After calling {@link #sort()} the sorter can not + * be used again. + */ +class ValuesAndCountsSorter { + + private double[] values; + private int[] counts; + private int length; + + public ValuesAndCountsSorter(double[] values, int[] counts) { + this.values = values; + this.counts = counts; + length = values.length; + if (counts.length != length) { + throw new IllegalArgumentException("Arrays must be of same length"); + } + } + + public void sort() { + // implemented using HeapSort + makeHeap(); + sortByHeap(); + } + + private void makeHeap() { + for (int i = (length / 2) - 1; i >= 0; i--) { + fixDown(i); + } + } + + private void sortByHeap() { + for (--length; length > 0; --length) { + swap(0, length); + fixDown(0); + } + } + + private final void swap(int index1, int index2) { + double d = values[index1]; + values[index1] = values[index2]; + values[index2] = d; + int i = counts[index1]; + counts[index1] = counts[index2]; + counts[index2] = i; + } + + private void fixDown(int index) { + int son = leftSon(index); + while (son < length) { + double maxValue = values[index]; + int maxIndex = index; + if (maxValue <= values[son]) { + maxValue = values[son]; + maxIndex = son; + } + son = rightSon(index); + if (son < length && maxValue <= values[son]) { + maxValue = values[son]; + maxIndex = son; + } + if (maxIndex == index) { + return; + } + swap(index, maxIndex); + index = maxIndex; + son = leftSon(index); + } + } + + private final int leftSon(int n) { + return 2 * n + 1; + } + + private final int rightSon(int n) { + return 2 * n + 2; + } + +} Index: contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/package.html =================================================================== --- contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/package.html (revision 0) +++ contrib/facet/src/java/org/apache/lucene/facet/enhancements/association/buckets/package.html (revision 0) @@ -0,0 +1,18 @@ + + +Association buckets + + +

Association buckets

+ +This package allows creating +{@link org.apache.lucene.facet.enhancements.association.buckets.AssociationBucketsFacetRequest AssociationBucketsFacetRequest} +which finds all associations of a given a category path, and puts them in +buckets by association value. There are two ways to determine the association +range covered by each bucket. The first is by providing a list of ranges along +with the facet request. The other is by stating the number of requested buckets, +and allowing dynamic bucket allocation which aims at providing buckets of +similar size. + + + \ No newline at end of file Index: contrib/facet/src/test/org/apache/lucene/facet/enhancements/association/TestAssociationBucketsExample.java =================================================================== --- contrib/facet/src/test/org/apache/lucene/facet/enhancements/association/TestAssociationBucketsExample.java (revision 0) +++ contrib/facet/src/test/org/apache/lucene/facet/enhancements/association/TestAssociationBucketsExample.java (revision 0) @@ -0,0 +1,137 @@ +package org.apache.lucene.facet.enhancements.association; + +import java.util.List; + +import org.apache.lucene.facet.enhancements.association.buckets.AssociationsBucket; +import org.apache.lucene.facet.enhancements.association.buckets.AssociationBucketsFacetResultNode; +import org.apache.lucene.facet.example.ExampleResult; +import org.apache.lucene.facet.example.association.AssociationUtils; +import org.apache.lucene.facet.example.association.buckets.AssociationBucketsMain; +import org.apache.lucene.facet.example.simple.SimpleSearcher; +import org.apache.lucene.facet.example.simple.SimpleUtils; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +/** + * Test that the association numerics works as expected. + */ +public class TestAssociationBucketsExample extends LuceneTestCase { + private static final double[][] EXPECTED_DYNAMIC_RESULTS = { + { 3d, 4d, 2d }, { 5d, 5d, 6d } }; + + private static final double[][] EXPECTED_DEFINED_RESULTS = { { 2d, 2d }, + { 18d, 14d } }; + private static final int[] EXPECTED_DEFINED_RESIDUE = { 14, 0 }; + + @Test + public void testWithNumericFields() throws Exception { + // create Directories for the search index and for the taxonomy index + Directory indexDir = new RAMDirectory(); + Directory taxoDir = new RAMDirectory(); + + ExampleResult res = new AssociationBucketsMain().runDynamicSample( + indexDir, taxoDir, true); + assertExampleResult(res, EXPECTED_DYNAMIC_RESULTS, new int[] { 0, 0 }); + assertNumericRangeQueries(indexDir, taxoDir, res, + EXPECTED_DYNAMIC_RESULTS); + + res = new AssociationBucketsMain().runDefinedSample(indexDir, taxoDir, + true); + assertExampleResult(res, EXPECTED_DEFINED_RESULTS, + EXPECTED_DEFINED_RESIDUE); + assertNumericRangeQueries(indexDir, taxoDir, res, + EXPECTED_DEFINED_RESULTS); + } + + @Test + public void testWithoutNumericFields() throws Exception { + // create Directories for the search index and for the taxonomy index + Directory indexDir = new RAMDirectory(); + Directory taxoDir = new RAMDirectory(); + + ExampleResult res = new AssociationBucketsMain().runDynamicSample( + indexDir, taxoDir, false); + assertExampleResult(res, EXPECTED_DYNAMIC_RESULTS, new int[] { 0, 0 }); + + res = new AssociationBucketsMain().runDefinedSample(indexDir, taxoDir, + false); + assertExampleResult(res, EXPECTED_DEFINED_RESULTS, + EXPECTED_DEFINED_RESIDUE); + } + + private void assertExampleResult(ExampleResult res, + double[][] expectedResults, int[] expectedResidue) { + assertNotNull("Null result!", res); + List facetResults = res.getFacetResults(); + assertNotNull("Null facet result!", facetResults); + assertEquals("Wrong number of results!", expectedResults.length, + facetResults.size()); + + for (int i = 0; i < expectedResults.length; i++) { + assertEquals("Wrong number of facets!", expectedResults[i].length, + facetResults.get(i).getNumValidDescendants()); + + Iterable it = facetResults.get(i) + .getFacetResultNode().getSubResults(); + int j = 0; + for (FacetResultNode fResNode : it) { + assertEquals("Wrong result for facet " + fResNode.getLabel(), + expectedResults[i][j++], fResNode.getValue(), 1E-5); + } + } + } + + private void assertNumericRangeQueries(Directory indexDir, + Directory taxoDir, ExampleResult res, double[][] expectedResults) + throws Exception { + List facetResults = res.getFacetResults(); + IndexReader indexReader = IndexReader.open(indexDir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); + + for (int i = 0; i < facetResults.size(); i++) { + FacetResultNode facetResultNode = facetResults.get(i) + .getFacetResultNode(); + Iterable it = facetResultNode + .getSubResults(); + int j = 0; + for (FacetResultNode fResNode : it) { + CategoryPath label = fResNode.getLabel(); + String field = label.getComponent(0); + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(new TermQuery(new Term(SimpleUtils.TEXT, + "white")), Occur.MUST); + AssociationsBucket range = ((AssociationBucketsFacetResultNode) fResNode) + .getBucket(); + booleanQuery.add(range.getRangeQuery(field), Occur.MUST); + + List countForRangeQuery = SimpleSearcher + .searchWithRequestAndQuery(booleanQuery, indexReader, + taxoReader, + AssociationUtils.assocIndexingParams, + new CountFacetRequest(new CategoryPath(field), + 100)); + assertNotNull("Null facet result!", countForRangeQuery); + assertEquals("Wrong number of results!", 1, + countForRangeQuery.size()); + FacetResult facetResult = countForRangeQuery.get(0); + FacetResultNode countResNode = facetResult.getFacetResultNode(); + assertEquals("Wrong result for NumericRangeQuery of facet " + + fResNode.getLabel(), expectedResults[i][j++], + countResNode.getValue(), 1E-5); + } + } + } +} Index: contrib/facet/src/test/org/apache/lucene/facet/enhancements/association/buckets/ValuesAndCountsSorterTest.java =================================================================== --- contrib/facet/src/test/org/apache/lucene/facet/enhancements/association/buckets/ValuesAndCountsSorterTest.java (revision 0) +++ contrib/facet/src/test/org/apache/lucene/facet/enhancements/association/buckets/ValuesAndCountsSorterTest.java (revision 0) @@ -0,0 +1,59 @@ +package org.apache.lucene.facet.enhancements.association.buckets; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class ValuesAndCountsSorterTest extends LuceneTestCase { + + @Test + public void test1() { + double[] values = new double[] { 1.3, 0, 4, -37.11, Math.E, Math.PI, + 3.14 }; + int[] counts = new int[] { 4, 7, 2, 8, 1, 2, 3 }; + Map map = new HashMap(); + for (int i = 0; i < values.length; i++) { + map.put(values[i], counts[i]); + } + testSorter(values, counts, map, ""); + } + + @Test + public void testRandom() { + long seed = System.currentTimeMillis(); + random.setSeed(seed ); + int length = 1000; + double[] values = new double[length]; + int[] counts = new int[length]; + Map map = new HashMap(); + while (map.size() < length) { + double value = random.nextDouble(); + // make sure we get unique double values + if (!map.containsKey(value)) { + int count = random.nextInt(200); + values[map.size()] = value; + counts[map.size()] = count; + map.put(value, count); + } + } + testSorter(values, counts, map, "(seed " + seed + ")"); + } + + private void testSorter(double[] values, int[] counts, + Map map, String prefix) { + new ValuesAndCountsSorter(values, counts).sort(); + for (int i = 0; i < values.length; i++) { + assertEquals(prefix + "count not fitting value", map.get(values[i]) + .intValue(), counts[i]); + } + for (int i = 0; i < values.length - 1; i++) { + if (values[i] > values[i + 1]) { + fail(prefix + "wrong ordering at location " + i + " (" + + values[i] + " before " + values[i + 1] + ")"); + } + } + } + +}