Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java --- lucene/src/java/org/apache/lucene/search/FieldComparator.java Sat Jun 04 12:14:05 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/FieldComparator.java Sat Jun 04 12:18:46 2011 -0400 @@ -657,8 +657,13 @@ @Override public void setScorer(Scorer scorer) { // wrap with a ScoreCachingWrappingScorer so that successive calls to - // score() will not incur score computation over and over again. - this.scorer = new ScoreCachingWrappingScorer(scorer); + // score() will not incur score computation over and + // over again. + if (!(scorer instanceof ScoreCachingWrappingScorer)) { + this.scorer = new ScoreCachingWrappingScorer(scorer); + } else { + this.scorer = scorer; + } } @Override Index: lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java --- lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java Sat Jun 04 12:14:05 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java Sat Jun 04 12:18:46 2011 -0400 @@ -31,12 +31,12 @@ * @see IndexSearcher#search(Query,Filter,int,Sort) * @see FieldCache */ -public abstract class FieldValueHitQueue extends PriorityQueue { +public abstract class FieldValueHitQueue extends PriorityQueue { - final static class Entry extends ScoreDoc { - int slot; + public static class Entry extends ScoreDoc { + public int slot; - Entry(int slot, int doc, float score) { + public Entry(int slot, int doc, float score) { super(doc, score); this.slot = slot; } @@ -51,7 +51,7 @@ * An implementation of {@link FieldValueHitQueue} which is optimized in case * there is just one comparator. */ - private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue { + private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue { private final int oneReverseMul; public OneComparatorFieldValueHitQueue(SortField[] fields, int size) @@ -92,7 +92,7 @@ * An implementation of {@link FieldValueHitQueue} which is optimized in case * there is more than one comparator. */ - private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue { + private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue { public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size) throws IOException { @@ -156,24 +156,28 @@ * The number of hits to retain. Must be greater than zero. * @throws IOException */ - public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException { + public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException { if (fields.length == 0) { throw new IllegalArgumentException("Sort must contain at least one field"); } if (fields.length == 1) { - return new OneComparatorFieldValueHitQueue(fields, size); + return new OneComparatorFieldValueHitQueue(fields, size); } else { - return new MultiComparatorsFieldValueHitQueue(fields, size); + return new MultiComparatorsFieldValueHitQueue(fields, size); } } - FieldComparator[] getComparators() { return comparators; } + public FieldComparator[] getComparators() { + return comparators; + } - int[] getReverseMul() { return reverseMul; } + public int[] getReverseMul() { + return reverseMul; + } - protected void setComparator(int pos, FieldComparator comparator) { + public void setComparator(int pos, FieldComparator comparator) { if (pos==0) firstComparator = comparator; comparators[pos] = comparator; } Index: lucene/src/java/org/apache/lucene/search/Scorer.java --- lucene/src/java/org/apache/lucene/search/Scorer.java Sat Jun 04 12:14:05 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/Scorer.java Sat Jun 04 12:18:46 2011 -0400 @@ -162,7 +162,7 @@ * * @lucene.experimental */ - protected void visitSubScorers(Query parent, Occur relationship, + public void visitSubScorers(Query parent, Occur relationship, ScorerVisitor visitor) { if (weight == null) throw new UnsupportedOperationException(); Index: lucene/src/java/org/apache/lucene/util/ArrayUtil.java --- lucene/src/java/org/apache/lucene/util/ArrayUtil.java Sat Jun 04 12:14:05 2011 -0400 +++ lucene/src/java/org/apache/lucene/util/ArrayUtil.java Sat Jun 04 12:18:46 2011 -0400 @@ -380,6 +380,56 @@ return array; } + public static int[][] grow(int[][] array, int minSize) { + if (array.length < minSize) { + int[][] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else { + return array; + } + } + + public static int[][] grow(int[][] array) { + return grow(array, 1 + array.length); + } + + public static int[][] shrink(int[][] array, int targetSize) { + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + if (newSize != array.length) { + int[][] newArray = new int[newSize][]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else { + return array; + } + } + + public static float[][] grow(float[][] array, int minSize) { + if (array.length < minSize) { + float[][] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else { + return array; + } + } + + public static float[][] grow(float[][] array) { + return grow(array, 1 + array.length); + } + + public static float[][] shrink(float[][] array, int targetSize) { + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + if (newSize != array.length) { + float[][] newArray = new float[newSize][]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else { + return array; + } + } + /** * Returns hash of chars in range start (inclusive) to * end (inclusive) @@ -604,6 +654,7 @@ */ public static void mergeSort(T[] a, int fromIndex, int toIndex, Comparator comp) { if (toIndex-fromIndex <= 1) return; + //System.out.println("SORT: " + (toIndex-fromIndex)); getSorter(a, comp).mergeSort(fromIndex, toIndex-1); } Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java --- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Sat Jun 04 12:14:05 2011 -0400 +++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Sat Jun 04 12:18:46 2011 -0400 @@ -35,6 +35,7 @@ import java.util.zip.ZipFile; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; @@ -420,4 +421,25 @@ newName.append(suffix); return new File(directory, newName.toString()); } + + // NOTE: this is likely buggy, and cannot clone fields + // with tokenStreamValues, etc. Use at your own risk!! + + // TODO: is there a pre-existing way to do this!!! + public static Document cloneDocument(Document doc1) { + final Document doc2 = new Document(); + for(Fieldable f : doc1.getFields()) { + Field field1 = (Field) f; + + Field field2 = new Field(field1.name(), + field1.stringValue(), + field1.isStored() ? Field.Store.YES : Field.Store.NO, + field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO); + field2.setOmitNorms(field1.getOmitNorms()); + field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions()); + doc2.add(field2); + } + + return doc2; + } } Index: lucene/src/test/org/apache/lucene/index/TestNRTThreads.java --- lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Sat Jun 04 12:14:05 2011 -0400 +++ lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Sat Jun 04 12:18:46 2011 -0400 @@ -70,28 +70,6 @@ } } - // TODO: is there a pre-existing way to do this!!! - private Document cloneDoc(Document doc1) { - final Document doc2 = new Document(); - for(Fieldable f : doc1.getFields()) { - Field field1 = (Field) f; - - Field field2 = new Field(field1.name(), - field1.stringValue(), - field1.isStored() ? Field.Store.YES : Field.Store.NO, - field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO); - if (field1.getOmitNorms()) { - field2.setOmitNorms(true); - } - if (field1.getOmitTermFreqAndPositions()) { - field2.setOmitTermFreqAndPositions(true); - } - doc2.add(field2); - } - - return doc2; - } - @Test public void testNRTThreads() throws Exception { @@ -217,7 +195,7 @@ allSubDocs.add(subDocs); doc.add(packIDField); - docsList.add(cloneDoc(doc)); + docsList.add(_TestUtil.cloneDocument(doc)); docIDs.add(doc.get("docid")); final int maxDocCount = _TestUtil.nextInt(random, 1, 10); @@ -226,7 +204,7 @@ if (doc == null) { break; } - docsList.add(cloneDoc(doc)); + docsList.add(_TestUtil.cloneDocument(doc)); docIDs.add(doc.get("docid")); } addCount.addAndGet(docsList.size()); Index: modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java --- modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sat Jun 04 12:14:05 2011 -0400 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sat Jun 04 12:18:46 2011 -0400 @@ -38,16 +38,21 @@ /** Total hits within this group */ public final int totalHits; + // nocommit better name; or, maybe, make this the groupValue? + public final int groupDoc; + /** Matches the groupSort passed to {@link * AbstractFirstPassGroupingCollector}. */ public final Comparable[] groupSortValues; public GroupDocs(float maxScore, int totalHits, + int groupDoc, ScoreDoc[] scoreDocs, GROUP_VALUE_TYPE groupValue, Comparable[] groupSortValues) { this.maxScore = maxScore; + this.groupDoc = groupDoc; this.totalHits = totalHits; this.scoreDocs = scoreDocs; this.groupValue = groupValue; Index: modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java Sat Jun 04 12:18:46 2011 -0400 @@ -0,0 +1,418 @@ +package org.apache.lucene.search.join; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.FieldValueHitQueue; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreCachingWrappingScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.grouping.GroupDocs; +import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.util.ArrayUtil; + + +// nocommit jdocs +public class BlockJoinCollector extends Collector { + + private final Sort sort; + + // Maps each BlockJoinQuery instance to its "slot" in + // joinScorers and in OneGroup's cached doc/scores/count: + private final Map joinQueryID = new HashMap(); + private final int numParentHits; + private final FieldValueHitQueue queue; + private final FieldComparator[] comparators; + private final int[] reverseMul; + private final int compEnd; + private final boolean trackMaxScore; + private final boolean trackScores; + + private int docBase; + private BlockJoinQuery.BlockJoinScorer[] joinScorers = new BlockJoinQuery.BlockJoinScorer[0]; + private IndexReader.AtomicReaderContext currentReaderContext; + private Scorer scorer; + private boolean queueFull; + + private OneGroup bottom; + private int totalHitCount; + private float maxScore = Float.NaN; + + // nocommit: accept null sort to mean "by relevance" + + // nocommit: should we require caller to provide number of + // BlockJoinQueries up front...? + public BlockJoinCollector(Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) throws IOException { + this.sort = sort; + this.trackMaxScore = trackMaxScore; + this.trackScores = trackScores; + this.numParentHits = numParentHits; + queue = FieldValueHitQueue.create(sort.getSort(), numParentHits); + comparators = queue.getComparators(); + reverseMul = queue.getReverseMul(); + compEnd = comparators.length - 1; + } + + private static final class OneGroup extends FieldValueHitQueue.Entry { + public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) { + super(comparatorSlot, parentDoc, parentScore); + docs = new int[numJoins][]; + for(int joinID=0;joinID 0) { + // Definitely competitive. + break; + } else if (i == compEnd) { + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + //System.out.println(" skip"); + return; + } + } + + //System.out.println(" competes!"); + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(bottom.slot, parentDoc); + } + if (!trackMaxScore && trackScores) { + score = scorer.score(); + } + bottom.doc = docBase + parentDoc; + bottom.score = score; + bottom = queue.updateTop(); + copyGroups(bottom); + + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } else { + // Startup transient: queue is not yet full: + final int comparatorSlot = totalHitCount - 1; + + // Copy hit into queue + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(comparatorSlot, parentDoc); + } + //System.out.println(" startup: new OG doc=" + (docBase+parentDoc)); + final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores); + og.readerContext = currentReaderContext; + copyGroups(og); + bottom = queue.add(og); + queueFull = totalHitCount == numParentHits; + if (queueFull) { + // End of startup transient: queue just filled up: + for (int i = 0; i < comparators.length; i++) { + comparators[i].setBottom(bottom.slot); + } + } + } + } + + // Pulls out child doc and scores for all join queries: + private void copyGroups(OneGroup og) { + // While rare, it's possible top arrays could be too + // short if join query had null scorer on first + // segment(s) but then became non-null on later segments + final int numSubScorers = joinScorers.length; + if (og.docs.length < numSubScorers) { + // While rare, this could happen if join query had + // null scorer on first segment(s) but then became + // non-null on later segments + og.docs = ArrayUtil.grow(og.docs); + } + if (og.counts.length < numSubScorers) { + og.counts = ArrayUtil.grow(og.counts); + } + if (trackScores && og.scores.length < numSubScorers) { + og.scores = ArrayUtil.grow(og.scores); + } + + //System.out.println("copyGroups"); + for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) { + final BlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX]; + //System.out.println(" scorer=" + joinScorer); + if (joinScorer != null) { + og.counts[scorerIDX] = joinScorer.getChildCount(); + //System.out.println(" count=" + og.counts[scorerIDX]); + og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]); + /* + for(int idx=0;idx() { + private void enroll(BlockJoinQuery query, BlockJoinQuery.BlockJoinScorer scorer) { + final Integer slot = joinQueryID.get(query); + if (slot == null) { + joinQueryID.put(query, joinScorers.length); + //System.out.println("found JQ: " + query + " slot=" + joinScorers.length); + final BlockJoinQuery.BlockJoinScorer[] newArray = new BlockJoinQuery.BlockJoinScorer[1+joinScorers.length]; + System.arraycopy(joinScorers, 0, newArray, 0, joinScorers.length); + joinScorers = newArray; + joinScorers[joinScorers.length-1] = scorer; + } else { + joinScorers[slot] = scorer; + } + } + + @Override + public void visitOptional(Query parent, Query child, Scorer scorer) { + //System.out.println("visitOpt"); + if (child instanceof BlockJoinQuery) { + enroll((BlockJoinQuery) child, + (BlockJoinQuery.BlockJoinScorer) scorer); + } + } + + @Override + public void visitRequired(Query parent, Query child, Scorer scorer) { + //System.out.println("visitReq parent=" + parent + " child=" + child + " scorer=" + scorer); + if (child instanceof BlockJoinQuery) { + enroll((BlockJoinQuery) child, + (BlockJoinQuery.BlockJoinScorer) scorer); + } + } + + @Override + public void visitProhibited(Query parent, Query child, Scorer scorer) { + //System.out.println("visitProh"); + if (child instanceof BlockJoinQuery) { + enroll((BlockJoinQuery) child, + (BlockJoinQuery.BlockJoinScorer) scorer); + } + } + }); + } + + private final static class FakeScorer extends Scorer { + + float score; + int doc; + + public FakeScorer() { + super((Weight) null); + } + + @Override + public float score() { + return score; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int advance(int target) { + throw new UnsupportedOperationException(); + } + + @Override + public int nextDoc() { + throw new UnsupportedOperationException(); + } + } + + private OneGroup[] sortedGroups; + + private void sortQueue() { + sortedGroups = new OneGroup[queue.size()]; + for(int downTo=queue.size()-1;downTo>=0;downTo--) { + sortedGroups[downTo] = queue.pop(); + } + } + + // nocommit jdocs + public TopGroups getTopGroups(BlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields) + + throws IOException { + + final Integer _slot = joinQueryID.get(query); + if (_slot == null) { + throw new IllegalArgumentException("the provided BlockJoinQuery was not involved in the search"); + } + + // unbox once + final int slot = _slot; + + if (offset >= queue.size()) { + return null; + } + int totalGroupedHitCount = 0; + + if (sortedGroups == null) { + sortQueue(); + } + + final FakeScorer fakeScorer = new FakeScorer(); + + final GroupDocs[] groups = new GroupDocs[sortedGroups.length - offset]; + + for(int groupIDX=offset;groupIDXOnce the block index is built, use this query to wrap + * any sub-Query against child docs and join matches in that + * sub query up to the docID space of the parent + * documents. You can then use this Query as a clause with + * other queries in the parent docID space.

+ * + *

The child documents must be orthogonal to the parent + * documents. That is, the wrapped child query must never + * return a parent docID.

+ * + *

See {@link org.apache.lucene.search.join} for a full + * code sample.

+ */ + +// nocommit code sample + +public class BlockJoinQuery extends Query { + + public static enum ScoreMode {None, Avg, Max, Total}; + + // nocommit -- pass to child instances to avoid access + private final Filter parentsFilter; + private final Query childQuery; + private final ScoreMode scoreMode; + + // nocommit -- can we require this is reversed (by docID) parent filter...? + public BlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode) { + super(); + this.childQuery = childQuery; + this.parentsFilter = parentsFilter; + this.scoreMode = scoreMode; + } + + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new BlockJoinWeight(this, childQuery.weight(searcher), parentsFilter, scoreMode); + } + + private static class BlockJoinWeight extends Weight { + private final Query joinQuery; + private final Weight childWeight; + private final Filter parentsFilter; + private final ScoreMode scoreMode; + + public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) { + super(); + this.joinQuery = joinQuery; + this.childWeight = childWeight; + this.parentsFilter = parentsFilter; + this.scoreMode = scoreMode; + } + + @Override + public Query getQuery() { + return joinQuery; + } + + @Override + public float getValue() { + return childWeight.getValue(); + } + + @Override + public float sumOfSquaredWeights() throws IOException { + return childWeight.sumOfSquaredWeights(); + } + + @Override + public void normalize(float norm) { + childWeight.normalize(norm); + } + + @Override + public Scorer scorer(AtomicReaderContext readerContext, ScorerContext context) throws IOException { + // Pass scoreDocsInOrder true, topScorer false to our sub: + final Scorer childScorer = childWeight.scorer(readerContext, ScorerContext.def().scoreDocsInOrder(true).topScorer(false)); + + if (childScorer == null) { + // No matches + return null; + } + + final int firstChildDoc = childScorer.nextDoc(); + if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) { + // No matches + return null; + } + + final DocIdSet parents = parentsFilter.getDocIdSet(readerContext); + // TODO: once we do random-access filters we can + // generalize this: + if (parents == null) { + // No matches + return null; + } + if (!(parents instanceof OpenBitSet)) { + throw new IllegalStateException("parentFilter must return OpenBitSet; got " + parents); + } + + return new BlockJoinScorer(this, childScorer, (OpenBitSet) parents, firstChildDoc, scoreMode); + } + + @Override + public Explanation explain(AtomicReaderContext reader, int doc) throws IOException { + // TODO + throw new UnsupportedOperationException(getClass().getName() + + " cannot explain match on parent document"); + } + + @Override + public boolean scoresDocsOutOfOrder() { + return false; + } + } + + static class BlockJoinScorer extends Scorer { + private final Scorer childScorer; + private final OpenBitSet parentBits; + private final ScoreMode scoreMode; + private int parentDoc; + private float parentScore; + private int nextChildDoc; + private int currentChild; + private float nextScore; + + private int[] pendingChildDocs = new int[5]; + private float[] pendingChildScores; + private int childDocUpto; + + public BlockJoinScorer(Weight weight, Scorer childScorer, OpenBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) { + super(weight); + // nocommit -- maybe pull DISI? + //System.out.println("Q.init firstChildDoc=" + firstChildDoc); + this.parentBits = parentBits; + this.childScorer = childScorer; + this.scoreMode = scoreMode; + if (scoreMode != ScoreMode.None) { + pendingChildScores = new float[5]; + } + nextChildDoc = firstChildDoc; + } + + @Override + public void visitSubScorers(Query parent, BooleanClause.Occur relationship, + ScorerVisitor visitor) { + super.visitSubScorers(parent, relationship, visitor); + childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor); + } + + int getChildCount() { + return childDocUpto; + } + + int[] swapChildDocs(int[] other) { + final int[] ret = pendingChildDocs; + if (other == null) { + pendingChildDocs = new int[5]; + } else { + pendingChildDocs = other; + } + return ret; + } + + float[] swapChildScores(float[] other) { + if (scoreMode == ScoreMode.None) { + throw new IllegalStateException("ScoreMode is None"); + } + final float[] ret = pendingChildScores; + if (other == null) { + pendingChildScores = new float[5]; + } else { + pendingChildScores = other; + } + return ret; + } + + @Override + public int nextDoc() throws IOException { + //System.out.println("Q.nextDoc()"); + + if (nextChildDoc == NO_MORE_DOCS) { + //System.out.println(" end"); + return parentDoc = NO_MORE_DOCS; + } + + // Gather all children sharing the same parent as nextChildDoc + parentDoc = parentBits.nextSetBit(nextChildDoc); + //System.out.println(" parentDoc=" + parentDoc); + assert parentDoc != -1; + + int numSiblingsOrParents = 1; + float totalScore = 0; + float maxScore = Float.NEGATIVE_INFINITY; + + childDocUpto = 0; + do { + //System.out.println(" c=" + nextChildDoc); + if (pendingChildDocs.length == childDocUpto) { + pendingChildDocs = ArrayUtil.grow(pendingChildDocs); + if (scoreMode != ScoreMode.None) { + pendingChildScores = ArrayUtil.grow(pendingChildScores); + } + } + pendingChildDocs[childDocUpto] = nextChildDoc; + if (scoreMode != ScoreMode.None) { + // TODO: specialize this into dedicated classes per-scoreMode + final float childScore = childScorer.score(); + pendingChildScores[childDocUpto] = childScore; + maxScore = Math.max(childScore, maxScore); + totalScore += childScore; + } + childDocUpto++; + nextChildDoc = childScorer.nextDoc(); + } while (nextChildDoc < parentDoc); + //System.out.println(" nextChildDoc=" + nextChildDoc); + + // Parent & child docs are supposed to be orthogonal: + assert nextChildDoc != parentDoc; + + switch(scoreMode) { + case Avg: + parentScore = totalScore / childDocUpto; + break; + case Max: + parentScore = maxScore; + break; + case Total: + parentScore = totalScore; + break; + case None: + break; + } + + //System.out.println(" return parentDoc=" + parentDoc); + return parentDoc; + } + + @Override + public int docID() { + return parentDoc; + } + + @Override + public float score() throws IOException { + return parentScore; + } + + @Override + public int advance(int parentTarget) throws IOException { + + //System.out.println("Q.advance parentTarget=" + parentTarget); + if (parentTarget == NO_MORE_DOCS) { + return parentDoc = NO_MORE_DOCS; + } + + // TODO: need a prevSetBit in obs! + int prevParentDoc = parentTarget-1; + while(prevParentDoc > 0 && !parentBits.fastGet(prevParentDoc)) { + prevParentDoc--; + } + //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc); + assert prevParentDoc >= parentDoc; + if (prevParentDoc > nextChildDoc) { + nextChildDoc = childScorer.advance(prevParentDoc); + //System.out.println(" childScorer advanced to child docID=" + nextChildDoc); + } else { + //System.out.println(" skip childScorer advance"); + } + + // Parent & child docs are supposed to be orthogonal: + assert nextChildDoc != prevParentDoc; + + final int nd = nextDoc(); + //System.out.println(" return nextParentDoc=" + nd); + return nd; + } + } + + @Override + public void extractTerms(Set terms) { + childQuery.extractTerms(terms); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + return this; + } + + @Override + public String toString(String field) { + return "BlockJoinQuery ("+childQuery.toString()+")"; + } +} Index: modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java Sat Jun 04 12:18:46 2011 -0400 @@ -0,0 +1,375 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.grouping.GroupDocs; +import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.search.join.BlockJoinCollector; +import org.apache.lucene.search.join.BlockJoinQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestBlockJoin extends LuceneTestCase { + + // One resume... + private Document makeResume(String name, String country) { + Document resume = new Document(); + resume.add(newField("docType", "resume", Field.Index.NOT_ANALYZED)); + resume.add(newField("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED)); + resume.add(newField("country", country, Field.Index.NOT_ANALYZED)); + return resume; + } + + // ... has multiple jobs + private Document makeJob(String skill, int year) { + Document job = new Document(); + job.add(newField("skill", skill, Field.Store.YES, Field.Index.NOT_ANALYZED)); + job.add(new NumericField("year").setIntValue(year)); + return job; + } + + public void testSimple() throws Exception { + + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random, dir); + + final List docs = new ArrayList(); + + docs.add(makeJob("java", 2007)); + docs.add(makeJob("python", 2010)); + docs.add(makeResume("Lisa", "United Kingdom")); + w.addDocuments(docs); + + docs.clear(); + docs.add(makeJob("ruby", 2005)); + docs.add(makeJob("java", 2006)); + docs.add(makeResume("Frank", "United States")); + w.addDocuments(docs); + + IndexReader r = w.getReader(); + w.close(); + IndexSearcher s = new IndexSearcher(r); + + // Create a filter that defines "parent" documents in the index - in this case resumes + Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); + + // Define child document criteria (finds an example of relevant work experience) + BooleanQuery childQuery = new BooleanQuery(); + childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST)); + childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST)); + + // Define parent document criteria (find a resident in the UK) + Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); + + // Wrap the child document query to 'join' any matches + // up to corresponding parent: + BlockJoinQuery childJoinQuery = new BlockJoinQuery(childQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg); + + // Combine the parent and nested child queries into a single query for a candidate + BooleanQuery fullQuery = new BooleanQuery(); + fullQuery.add(new BooleanClause(parentQuery, Occur.MUST)); + fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST)); + + BlockJoinCollector c = new BlockJoinCollector(Sort.RELEVANCE, 1, true, false); + + s.search(fullQuery, c); + + TopGroups results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true); + + assertEquals(1, results.totalHitCount); + assertEquals(1, results.totalGroupedHitCount); + assertEquals(1, results.groups.length); + + final GroupDocs group = results.groups[0]; + assertEquals(1, group.totalHits); + + Document childDoc = s.doc(group.scoreDocs[0].doc); + //System.out.println(" doc=" + group.scoreDocs[0].doc); + assertEquals("java", childDoc.get("skill")); + + Document parentDoc = s.doc(group.groupDoc); + assertEquals("Lisa", parentDoc.get("name")); + + r.close(); + dir.close(); + } + + private String[][] getRandomFields(int maxUniqueValues) { + + final String[][] fields = new String[_TestUtil.nextInt(random, 2, 4)][]; + for(int fieldID=0;fieldID 1 join'd tables too + final RandomIndexWriter w = new RandomIndexWriter(random, dir); + final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir); + for(int parentDocID=0;parentDocID joinDocs = new ArrayList(); + + final int numChildDocs = _TestUtil.nextInt(random, 1, 20); + for(int childDocID=0;childDocID