Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java
--- lucene/src/java/org/apache/lucene/search/FieldComparator.java Sat Jun 04 12:14:05 2011 -0400
+++ lucene/src/java/org/apache/lucene/search/FieldComparator.java Sat Jun 04 12:18:46 2011 -0400
@@ -657,8 +657,13 @@
@Override
public void setScorer(Scorer scorer) {
// wrap with a ScoreCachingWrappingScorer so that successive calls to
- // score() will not incur score computation over and over again.
- this.scorer = new ScoreCachingWrappingScorer(scorer);
+ // score() will not incur score computation over and
+ // over again.
+ if (!(scorer instanceof ScoreCachingWrappingScorer)) {
+ this.scorer = new ScoreCachingWrappingScorer(scorer);
+ } else {
+ this.scorer = scorer;
+ }
}
@Override
Index: lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java
--- lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java Sat Jun 04 12:14:05 2011 -0400
+++ lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java Sat Jun 04 12:18:46 2011 -0400
@@ -31,12 +31,12 @@
* @see IndexSearcher#search(Query,Filter,int,Sort)
* @see FieldCache
*/
-public abstract class FieldValueHitQueue extends PriorityQueue {
+public abstract class FieldValueHitQueue extends PriorityQueue {
- final static class Entry extends ScoreDoc {
- int slot;
+ public static class Entry extends ScoreDoc {
+ public int slot;
- Entry(int slot, int doc, float score) {
+ public Entry(int slot, int doc, float score) {
super(doc, score);
this.slot = slot;
}
@@ -51,7 +51,7 @@
* An implementation of {@link FieldValueHitQueue} which is optimized in case
* there is just one comparator.
*/
- private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue {
+ private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue {
private final int oneReverseMul;
public OneComparatorFieldValueHitQueue(SortField[] fields, int size)
@@ -92,7 +92,7 @@
* An implementation of {@link FieldValueHitQueue} which is optimized in case
* there is more than one comparator.
*/
- private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue {
+ private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue {
public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size)
throws IOException {
@@ -156,24 +156,28 @@
* The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
- public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException {
+ public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException {
if (fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field");
}
if (fields.length == 1) {
- return new OneComparatorFieldValueHitQueue(fields, size);
+ return new OneComparatorFieldValueHitQueue(fields, size);
} else {
- return new MultiComparatorsFieldValueHitQueue(fields, size);
+ return new MultiComparatorsFieldValueHitQueue(fields, size);
}
}
- FieldComparator[] getComparators() { return comparators; }
+ public FieldComparator[] getComparators() {
+ return comparators;
+ }
- int[] getReverseMul() { return reverseMul; }
+ public int[] getReverseMul() {
+ return reverseMul;
+ }
- protected void setComparator(int pos, FieldComparator comparator) {
+ public void setComparator(int pos, FieldComparator comparator) {
if (pos==0) firstComparator = comparator;
comparators[pos] = comparator;
}
Index: lucene/src/java/org/apache/lucene/search/Scorer.java
--- lucene/src/java/org/apache/lucene/search/Scorer.java Sat Jun 04 12:14:05 2011 -0400
+++ lucene/src/java/org/apache/lucene/search/Scorer.java Sat Jun 04 12:18:46 2011 -0400
@@ -162,7 +162,7 @@
*
* @lucene.experimental
*/
- protected void visitSubScorers(Query parent, Occur relationship,
+ public void visitSubScorers(Query parent, Occur relationship,
ScorerVisitor visitor) {
if (weight == null)
throw new UnsupportedOperationException();
Index: lucene/src/java/org/apache/lucene/util/ArrayUtil.java
--- lucene/src/java/org/apache/lucene/util/ArrayUtil.java Sat Jun 04 12:14:05 2011 -0400
+++ lucene/src/java/org/apache/lucene/util/ArrayUtil.java Sat Jun 04 12:18:46 2011 -0400
@@ -380,6 +380,56 @@
return array;
}
+ public static int[][] grow(int[][] array, int minSize) {
+ if (array.length < minSize) {
+ int[][] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
+ System.arraycopy(array, 0, newArray, 0, array.length);
+ return newArray;
+ } else {
+ return array;
+ }
+ }
+
+ public static int[][] grow(int[][] array) {
+ return grow(array, 1 + array.length);
+ }
+
+ public static int[][] shrink(int[][] array, int targetSize) {
+ final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ if (newSize != array.length) {
+ int[][] newArray = new int[newSize][];
+ System.arraycopy(array, 0, newArray, 0, newSize);
+ return newArray;
+ } else {
+ return array;
+ }
+ }
+
+ public static float[][] grow(float[][] array, int minSize) {
+ if (array.length < minSize) {
+ float[][] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
+ System.arraycopy(array, 0, newArray, 0, array.length);
+ return newArray;
+ } else {
+ return array;
+ }
+ }
+
+ public static float[][] grow(float[][] array) {
+ return grow(array, 1 + array.length);
+ }
+
+ public static float[][] shrink(float[][] array, int targetSize) {
+ final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ if (newSize != array.length) {
+ float[][] newArray = new float[newSize][];
+ System.arraycopy(array, 0, newArray, 0, newSize);
+ return newArray;
+ } else {
+ return array;
+ }
+ }
+
/**
* Returns hash of chars in range start (inclusive) to
* end (inclusive)
@@ -604,6 +654,7 @@
*/
public static void mergeSort(T[] a, int fromIndex, int toIndex, Comparator super T> comp) {
if (toIndex-fromIndex <= 1) return;
+ //System.out.println("SORT: " + (toIndex-fromIndex));
getSorter(a, comp).mergeSort(fromIndex, toIndex-1);
}
Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
--- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Sat Jun 04 12:14:05 2011 -0400
+++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Sat Jun 04 12:18:46 2011 -0400
@@ -35,6 +35,7 @@
import java.util.zip.ZipFile;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler;
@@ -420,4 +421,25 @@
newName.append(suffix);
return new File(directory, newName.toString());
}
+
+ // NOTE: this is likely buggy, and cannot clone fields
+ // with tokenStreamValues, etc. Use at your own risk!!
+
+ // TODO: is there a pre-existing way to do this!!!
+ public static Document cloneDocument(Document doc1) {
+ final Document doc2 = new Document();
+ for(Fieldable f : doc1.getFields()) {
+ Field field1 = (Field) f;
+
+ Field field2 = new Field(field1.name(),
+ field1.stringValue(),
+ field1.isStored() ? Field.Store.YES : Field.Store.NO,
+ field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
+ field2.setOmitNorms(field1.getOmitNorms());
+ field2.setOmitTermFreqAndPositions(field1.getOmitTermFreqAndPositions());
+ doc2.add(field2);
+ }
+
+ return doc2;
+ }
}
Index: lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
--- lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Sat Jun 04 12:14:05 2011 -0400
+++ lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Sat Jun 04 12:18:46 2011 -0400
@@ -70,28 +70,6 @@
}
}
- // TODO: is there a pre-existing way to do this!!!
- private Document cloneDoc(Document doc1) {
- final Document doc2 = new Document();
- for(Fieldable f : doc1.getFields()) {
- Field field1 = (Field) f;
-
- Field field2 = new Field(field1.name(),
- field1.stringValue(),
- field1.isStored() ? Field.Store.YES : Field.Store.NO,
- field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
- if (field1.getOmitNorms()) {
- field2.setOmitNorms(true);
- }
- if (field1.getOmitTermFreqAndPositions()) {
- field2.setOmitTermFreqAndPositions(true);
- }
- doc2.add(field2);
- }
-
- return doc2;
- }
-
@Test
public void testNRTThreads() throws Exception {
@@ -217,7 +195,7 @@
allSubDocs.add(subDocs);
doc.add(packIDField);
- docsList.add(cloneDoc(doc));
+ docsList.add(_TestUtil.cloneDocument(doc));
docIDs.add(doc.get("docid"));
final int maxDocCount = _TestUtil.nextInt(random, 1, 10);
@@ -226,7 +204,7 @@
if (doc == null) {
break;
}
- docsList.add(cloneDoc(doc));
+ docsList.add(_TestUtil.cloneDocument(doc));
docIDs.add(doc.get("docid"));
}
addCount.addAndGet(docsList.size());
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java
--- modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sat Jun 04 12:14:05 2011 -0400
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sat Jun 04 12:18:46 2011 -0400
@@ -38,16 +38,21 @@
/** Total hits within this group */
public final int totalHits;
+ // nocommit better name; or, maybe, make this the groupValue?
+ public final int groupDoc;
+
/** Matches the groupSort passed to {@link
* AbstractFirstPassGroupingCollector}. */
public final Comparable[] groupSortValues;
public GroupDocs(float maxScore,
int totalHits,
+ int groupDoc,
ScoreDoc[] scoreDocs,
GROUP_VALUE_TYPE groupValue,
Comparable[] groupSortValues) {
this.maxScore = maxScore;
+ this.groupDoc = groupDoc;
this.totalHits = totalHits;
this.scoreDocs = scoreDocs;
this.groupValue = groupValue;
Index: modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java Sat Jun 04 12:18:46 2011 -0400
@@ -0,0 +1,418 @@
+package org.apache.lucene.search.join;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.FieldValueHitQueue;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreCachingWrappingScorer;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.grouping.GroupDocs;
+import org.apache.lucene.search.grouping.TopGroups;
+import org.apache.lucene.util.ArrayUtil;
+
+
+// nocommit jdocs
+public class BlockJoinCollector extends Collector {
+
+ private final Sort sort;
+
+ // Maps each BlockJoinQuery instance to its "slot" in
+ // joinScorers and in OneGroup's cached doc/scores/count:
+ private final Map joinQueryID = new HashMap();
+ private final int numParentHits;
+ private final FieldValueHitQueue queue;
+ private final FieldComparator[] comparators;
+ private final int[] reverseMul;
+ private final int compEnd;
+ private final boolean trackMaxScore;
+ private final boolean trackScores;
+
+ private int docBase;
+ private BlockJoinQuery.BlockJoinScorer[] joinScorers = new BlockJoinQuery.BlockJoinScorer[0];
+ private IndexReader.AtomicReaderContext currentReaderContext;
+ private Scorer scorer;
+ private boolean queueFull;
+
+ private OneGroup bottom;
+ private int totalHitCount;
+ private float maxScore = Float.NaN;
+
+ // nocommit: accept null sort to mean "by relevance"
+
+ // nocommit: should we require caller to provide number of
+ // BlockJoinQueries up front...?
+ public BlockJoinCollector(Sort sort, int numParentHits, boolean trackScores, boolean trackMaxScore) throws IOException {
+ this.sort = sort;
+ this.trackMaxScore = trackMaxScore;
+ this.trackScores = trackScores;
+ this.numParentHits = numParentHits;
+ queue = FieldValueHitQueue.create(sort.getSort(), numParentHits);
+ comparators = queue.getComparators();
+ reverseMul = queue.getReverseMul();
+ compEnd = comparators.length - 1;
+ }
+
+ private static final class OneGroup extends FieldValueHitQueue.Entry {
+ public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) {
+ super(comparatorSlot, parentDoc, parentScore);
+ docs = new int[numJoins][];
+ for(int joinID=0;joinID 0) {
+ // Definitely competitive.
+ break;
+ } else if (i == compEnd) {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ //System.out.println(" skip");
+ return;
+ }
+ }
+
+ //System.out.println(" competes!");
+
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ for (int i = 0; i < comparators.length; i++) {
+ comparators[i].copy(bottom.slot, parentDoc);
+ }
+ if (!trackMaxScore && trackScores) {
+ score = scorer.score();
+ }
+ bottom.doc = docBase + parentDoc;
+ bottom.score = score;
+ bottom = queue.updateTop();
+ copyGroups(bottom);
+
+ for (int i = 0; i < comparators.length; i++) {
+ comparators[i].setBottom(bottom.slot);
+ }
+ } else {
+ // Startup transient: queue is not yet full:
+ final int comparatorSlot = totalHitCount - 1;
+
+ // Copy hit into queue
+ for (int i = 0; i < comparators.length; i++) {
+ comparators[i].copy(comparatorSlot, parentDoc);
+ }
+ //System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
+ final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores);
+ og.readerContext = currentReaderContext;
+ copyGroups(og);
+ bottom = queue.add(og);
+ queueFull = totalHitCount == numParentHits;
+ if (queueFull) {
+ // End of startup transient: queue just filled up:
+ for (int i = 0; i < comparators.length; i++) {
+ comparators[i].setBottom(bottom.slot);
+ }
+ }
+ }
+ }
+
+ // Pulls out child doc and scores for all join queries:
+ private void copyGroups(OneGroup og) {
+ // While rare, it's possible top arrays could be too
+ // short if join query had null scorer on first
+ // segment(s) but then became non-null on later segments
+ final int numSubScorers = joinScorers.length;
+ if (og.docs.length < numSubScorers) {
+ // While rare, this could happen if join query had
+ // null scorer on first segment(s) but then became
+ // non-null on later segments
+ og.docs = ArrayUtil.grow(og.docs);
+ }
+ if (og.counts.length < numSubScorers) {
+ og.counts = ArrayUtil.grow(og.counts);
+ }
+ if (trackScores && og.scores.length < numSubScorers) {
+ og.scores = ArrayUtil.grow(og.scores);
+ }
+
+ //System.out.println("copyGroups");
+ for(int scorerIDX = 0;scorerIDX < numSubScorers;scorerIDX++) {
+ final BlockJoinQuery.BlockJoinScorer joinScorer = joinScorers[scorerIDX];
+ //System.out.println(" scorer=" + joinScorer);
+ if (joinScorer != null) {
+ og.counts[scorerIDX] = joinScorer.getChildCount();
+ //System.out.println(" count=" + og.counts[scorerIDX]);
+ og.docs[scorerIDX] = joinScorer.swapChildDocs(og.docs[scorerIDX]);
+ /*
+ for(int idx=0;idx() {
+ private void enroll(BlockJoinQuery query, BlockJoinQuery.BlockJoinScorer scorer) {
+ final Integer slot = joinQueryID.get(query);
+ if (slot == null) {
+ joinQueryID.put(query, joinScorers.length);
+ //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
+ final BlockJoinQuery.BlockJoinScorer[] newArray = new BlockJoinQuery.BlockJoinScorer[1+joinScorers.length];
+ System.arraycopy(joinScorers, 0, newArray, 0, joinScorers.length);
+ joinScorers = newArray;
+ joinScorers[joinScorers.length-1] = scorer;
+ } else {
+ joinScorers[slot] = scorer;
+ }
+ }
+
+ @Override
+ public void visitOptional(Query parent, Query child, Scorer scorer) {
+ //System.out.println("visitOpt");
+ if (child instanceof BlockJoinQuery) {
+ enroll((BlockJoinQuery) child,
+ (BlockJoinQuery.BlockJoinScorer) scorer);
+ }
+ }
+
+ @Override
+ public void visitRequired(Query parent, Query child, Scorer scorer) {
+ //System.out.println("visitReq parent=" + parent + " child=" + child + " scorer=" + scorer);
+ if (child instanceof BlockJoinQuery) {
+ enroll((BlockJoinQuery) child,
+ (BlockJoinQuery.BlockJoinScorer) scorer);
+ }
+ }
+
+ @Override
+ public void visitProhibited(Query parent, Query child, Scorer scorer) {
+ //System.out.println("visitProh");
+ if (child instanceof BlockJoinQuery) {
+ enroll((BlockJoinQuery) child,
+ (BlockJoinQuery.BlockJoinScorer) scorer);
+ }
+ }
+ });
+ }
+
+ private final static class FakeScorer extends Scorer {
+
+ float score;
+ int doc;
+
+ public FakeScorer() {
+ super((Weight) null);
+ }
+
+ @Override
+ public float score() {
+ return score;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int advance(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int nextDoc() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private OneGroup[] sortedGroups;
+
+ private void sortQueue() {
+ sortedGroups = new OneGroup[queue.size()];
+ for(int downTo=queue.size()-1;downTo>=0;downTo--) {
+ sortedGroups[downTo] = queue.pop();
+ }
+ }
+
+ // nocommit jdocs
+ public TopGroups getTopGroups(BlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, boolean fillSortFields)
+
+ throws IOException {
+
+ final Integer _slot = joinQueryID.get(query);
+ if (_slot == null) {
+ throw new IllegalArgumentException("the provided BlockJoinQuery was not involved in the search");
+ }
+
+ // unbox once
+ final int slot = _slot;
+
+ if (offset >= queue.size()) {
+ return null;
+ }
+ int totalGroupedHitCount = 0;
+
+ if (sortedGroups == null) {
+ sortQueue();
+ }
+
+ final FakeScorer fakeScorer = new FakeScorer();
+
+ final GroupDocs[] groups = new GroupDocs[sortedGroups.length - offset];
+
+ for(int groupIDX=offset;groupIDXOnce the block index is built, use this query to wrap
+ * any sub-Query against child docs and join matches in that
+ * sub query up to the docID space of the parent
+ * documents. You can then use this Query as a clause with
+ * other queries in the parent docID space.
+ *
+ * The child documents must be orthogonal to the parent
+ * documents. That is, the wrapped child query must never
+ * return a parent docID.
+ *
+ * See {@link org.apache.lucene.search.join} for a full
+ * code sample.
+ */
+
+// nocommit code sample
+
+public class BlockJoinQuery extends Query {
+
+ public static enum ScoreMode {None, Avg, Max, Total};
+
+ // nocommit -- pass to child instances to avoid access
+ private final Filter parentsFilter;
+ private final Query childQuery;
+ private final ScoreMode scoreMode;
+
+ // nocommit -- can we require this is reversed (by docID) parent filter...?
+ public BlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
+ super();
+ this.childQuery = childQuery;
+ this.parentsFilter = parentsFilter;
+ this.scoreMode = scoreMode;
+ }
+
+ public Weight createWeight(IndexSearcher searcher) throws IOException {
+ return new BlockJoinWeight(this, childQuery.weight(searcher), parentsFilter, scoreMode);
+ }
+
+ private static class BlockJoinWeight extends Weight {
+ private final Query joinQuery;
+ private final Weight childWeight;
+ private final Filter parentsFilter;
+ private final ScoreMode scoreMode;
+
+ public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) {
+ super();
+ this.joinQuery = joinQuery;
+ this.childWeight = childWeight;
+ this.parentsFilter = parentsFilter;
+ this.scoreMode = scoreMode;
+ }
+
+ @Override
+ public Query getQuery() {
+ return joinQuery;
+ }
+
+ @Override
+ public float getValue() {
+ return childWeight.getValue();
+ }
+
+ @Override
+ public float sumOfSquaredWeights() throws IOException {
+ return childWeight.sumOfSquaredWeights();
+ }
+
+ @Override
+ public void normalize(float norm) {
+ childWeight.normalize(norm);
+ }
+
+ @Override
+ public Scorer scorer(AtomicReaderContext readerContext, ScorerContext context) throws IOException {
+ // Pass scoreDocsInOrder true, topScorer false to our sub:
+ final Scorer childScorer = childWeight.scorer(readerContext, ScorerContext.def().scoreDocsInOrder(true).topScorer(false));
+
+ if (childScorer == null) {
+ // No matches
+ return null;
+ }
+
+ final int firstChildDoc = childScorer.nextDoc();
+ if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
+ // No matches
+ return null;
+ }
+
+ final DocIdSet parents = parentsFilter.getDocIdSet(readerContext);
+ // TODO: once we do random-access filters we can
+ // generalize this:
+ if (parents == null) {
+ // No matches
+ return null;
+ }
+ if (!(parents instanceof OpenBitSet)) {
+ throw new IllegalStateException("parentFilter must return OpenBitSet; got " + parents);
+ }
+
+ return new BlockJoinScorer(this, childScorer, (OpenBitSet) parents, firstChildDoc, scoreMode);
+ }
+
+ @Override
+ public Explanation explain(AtomicReaderContext reader, int doc) throws IOException {
+ // TODO
+ throw new UnsupportedOperationException(getClass().getName() +
+ " cannot explain match on parent document");
+ }
+
+ @Override
+ public boolean scoresDocsOutOfOrder() {
+ return false;
+ }
+ }
+
+ static class BlockJoinScorer extends Scorer {
+ private final Scorer childScorer;
+ private final OpenBitSet parentBits;
+ private final ScoreMode scoreMode;
+ private int parentDoc;
+ private float parentScore;
+ private int nextChildDoc;
+ private int currentChild;
+ private float nextScore;
+
+ private int[] pendingChildDocs = new int[5];
+ private float[] pendingChildScores;
+ private int childDocUpto;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, OpenBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
+ super(weight);
+ // nocommit -- maybe pull DISI?
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ this.parentBits = parentBits;
+ this.childScorer = childScorer;
+ this.scoreMode = scoreMode;
+ if (scoreMode != ScoreMode.None) {
+ pendingChildScores = new float[5];
+ }
+ nextChildDoc = firstChildDoc;
+ }
+
+ @Override
+ public void visitSubScorers(Query parent, BooleanClause.Occur relationship,
+ ScorerVisitor visitor) {
+ super.visitSubScorers(parent, relationship, visitor);
+ childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor);
+ }
+
+ int getChildCount() {
+ return childDocUpto;
+ }
+
+ int[] swapChildDocs(int[] other) {
+ final int[] ret = pendingChildDocs;
+ if (other == null) {
+ pendingChildDocs = new int[5];
+ } else {
+ pendingChildDocs = other;
+ }
+ return ret;
+ }
+
+ float[] swapChildScores(float[] other) {
+ if (scoreMode == ScoreMode.None) {
+ throw new IllegalStateException("ScoreMode is None");
+ }
+ final float[] ret = pendingChildScores;
+ if (other == null) {
+ pendingChildScores = new float[5];
+ } else {
+ pendingChildScores = other;
+ }
+ return ret;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ //System.out.println("Q.nextDoc()");
+
+ if (nextChildDoc == NO_MORE_DOCS) {
+ //System.out.println(" end");
+ return parentDoc = NO_MORE_DOCS;
+ }
+
+ // Gather all children sharing the same parent as nextChildDoc
+ parentDoc = parentBits.nextSetBit(nextChildDoc);
+ //System.out.println(" parentDoc=" + parentDoc);
+ assert parentDoc != -1;
+
+ int numSiblingsOrParents = 1;
+ float totalScore = 0;
+ float maxScore = Float.NEGATIVE_INFINITY;
+
+ childDocUpto = 0;
+ do {
+ //System.out.println(" c=" + nextChildDoc);
+ if (pendingChildDocs.length == childDocUpto) {
+ pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
+ if (scoreMode != ScoreMode.None) {
+ pendingChildScores = ArrayUtil.grow(pendingChildScores);
+ }
+ }
+ pendingChildDocs[childDocUpto] = nextChildDoc;
+ if (scoreMode != ScoreMode.None) {
+ // TODO: specialize this into dedicated classes per-scoreMode
+ final float childScore = childScorer.score();
+ pendingChildScores[childDocUpto] = childScore;
+ maxScore = Math.max(childScore, maxScore);
+ totalScore += childScore;
+ }
+ childDocUpto++;
+ nextChildDoc = childScorer.nextDoc();
+ } while (nextChildDoc < parentDoc);
+ //System.out.println(" nextChildDoc=" + nextChildDoc);
+
+ // Parent & child docs are supposed to be orthogonal:
+ assert nextChildDoc != parentDoc;
+
+ switch(scoreMode) {
+ case Avg:
+ parentScore = totalScore / childDocUpto;
+ break;
+ case Max:
+ parentScore = maxScore;
+ break;
+ case Total:
+ parentScore = totalScore;
+ break;
+ case None:
+ break;
+ }
+
+ //System.out.println(" return parentDoc=" + parentDoc);
+ return parentDoc;
+ }
+
+ @Override
+ public int docID() {
+ return parentDoc;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return parentScore;
+ }
+
+ @Override
+ public int advance(int parentTarget) throws IOException {
+
+ //System.out.println("Q.advance parentTarget=" + parentTarget);
+ if (parentTarget == NO_MORE_DOCS) {
+ return parentDoc = NO_MORE_DOCS;
+ }
+
+ // TODO: need a prevSetBit in obs!
+ int prevParentDoc = parentTarget-1;
+ while(prevParentDoc > 0 && !parentBits.fastGet(prevParentDoc)) {
+ prevParentDoc--;
+ }
+ //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
+ assert prevParentDoc >= parentDoc;
+ if (prevParentDoc > nextChildDoc) {
+ nextChildDoc = childScorer.advance(prevParentDoc);
+ //System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
+ } else {
+ //System.out.println(" skip childScorer advance");
+ }
+
+ // Parent & child docs are supposed to be orthogonal:
+ assert nextChildDoc != prevParentDoc;
+
+ final int nd = nextDoc();
+ //System.out.println(" return nextParentDoc=" + nd);
+ return nd;
+ }
+ }
+
+ @Override
+ public void extractTerms(Set terms) {
+ childQuery.extractTerms(terms);
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ return this;
+ }
+
+ @Override
+ public String toString(String field) {
+ return "BlockJoinQuery ("+childQuery.toString()+")";
+ }
+}
Index: modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java Sat Jun 04 12:18:46 2011 -0400
@@ -0,0 +1,375 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.grouping.GroupDocs;
+import org.apache.lucene.search.grouping.TopGroups;
+import org.apache.lucene.search.join.BlockJoinCollector;
+import org.apache.lucene.search.join.BlockJoinQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestBlockJoin extends LuceneTestCase {
+
+ // One resume...
+ private Document makeResume(String name, String country) {
+ Document resume = new Document();
+ resume.add(newField("docType", "resume", Field.Index.NOT_ANALYZED));
+ resume.add(newField("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED));
+ resume.add(newField("country", country, Field.Index.NOT_ANALYZED));
+ return resume;
+ }
+
+ // ... has multiple jobs
+ private Document makeJob(String skill, int year) {
+ Document job = new Document();
+ job.add(newField("skill", skill, Field.Store.YES, Field.Index.NOT_ANALYZED));
+ job.add(new NumericField("year").setIntValue(year));
+ return job;
+ }
+
+ public void testSimple() throws Exception {
+
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+ final List docs = new ArrayList();
+
+ docs.add(makeJob("java", 2007));
+ docs.add(makeJob("python", 2010));
+ docs.add(makeResume("Lisa", "United Kingdom"));
+ w.addDocuments(docs);
+
+ docs.clear();
+ docs.add(makeJob("ruby", 2005));
+ docs.add(makeJob("java", 2006));
+ docs.add(makeResume("Frank", "United States"));
+ w.addDocuments(docs);
+
+ IndexReader r = w.getReader();
+ w.close();
+ IndexSearcher s = new IndexSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childQuery = new BooleanQuery();
+ childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
+ childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ BlockJoinQuery childJoinQuery = new BlockJoinQuery(childQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
+
+ // Combine the parent and nested child queries into a single query for a candidate
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
+ fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
+
+ BlockJoinCollector c = new BlockJoinCollector(Sort.RELEVANCE, 1, true, false);
+
+ s.search(fullQuery, c);
+
+ TopGroups results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true);
+
+ assertEquals(1, results.totalHitCount);
+ assertEquals(1, results.totalGroupedHitCount);
+ assertEquals(1, results.groups.length);
+
+ final GroupDocs group = results.groups[0];
+ assertEquals(1, group.totalHits);
+
+ Document childDoc = s.doc(group.scoreDocs[0].doc);
+ //System.out.println(" doc=" + group.scoreDocs[0].doc);
+ assertEquals("java", childDoc.get("skill"));
+
+ Document parentDoc = s.doc(group.groupDoc);
+ assertEquals("Lisa", parentDoc.get("name"));
+
+ r.close();
+ dir.close();
+ }
+
+ private String[][] getRandomFields(int maxUniqueValues) {
+
+ final String[][] fields = new String[_TestUtil.nextInt(random, 2, 4)][];
+ for(int fieldID=0;fieldID 1 join'd tables too
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+ final RandomIndexWriter joinW = new RandomIndexWriter(random, joinDir);
+ for(int parentDocID=0;parentDocID joinDocs = new ArrayList();
+
+ final int numChildDocs = _TestUtil.nextInt(random, 1, 20);
+ for(int childDocID=0;childDocID