Index: lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java =================================================================== --- lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (revision 1477345) +++ lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (working copy) @@ -1206,4 +1206,191 @@ r.close(); dir.close(); } + + // LUCENE-4968 + public void testSometimesParentOnlyMatches() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Document parent = new Document(); + parent.add(new StoredField("parentID", "0")); + parent.add(newTextField("parentText", "text", Field.Store.NO)); + parent.add(newStringField("isParent", "yes", Field.Store.NO)); + + List docs = new ArrayList(); + + Document child = new Document(); + docs.add(child); + child.add(new StoredField("childID", "0")); + child.add(newTextField("childText", "text", Field.Store.NO)); + + // parent last: + docs.add(parent); + w.addDocuments(docs); + + docs.clear(); + + parent = new Document(); + parent.add(newTextField("parentText", "text", Field.Store.NO)); + parent.add(newStringField("isParent", "yes", Field.Store.NO)); + parent.add(new StoredField("parentID", "1")); + + // parent last: + docs.add(parent); + w.addDocuments(docs); + + IndexReader r = w.getReader(); + w.close(); + + Query childQuery = new TermQuery(new Term("childText", "text")); + Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); + ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); + BooleanQuery parentQuery = new BooleanQuery(); + parentQuery.add(childJoinQuery, Occur.SHOULD); + parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD); + + ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)), + 10, true, true); + newSearcher(r).search(parentQuery, c); + TopGroups groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false); + + // Two parents: + assertEquals(2, groups.totalGroupCount.intValue()); + + // One child docs: + assertEquals(1, groups.totalGroupedHitCount); + + GroupDocs group = groups.groups[0]; + Document doc = r.document(group.groupValue.intValue()); + assertEquals("0", doc.get("parentID")); + System.out.println("group: " + group); + + group = groups.groups[1]; + doc = r.document(group.groupValue.intValue()); + assertEquals("1", doc.get("parentID")); + + r.close(); + d.close(); + } + + // LUCENE-4968 + public void testChildQueryNeverMatches() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Document parent = new Document(); + parent.add(new StoredField("parentID", "0")); + parent.add(newTextField("parentText", "text", Field.Store.NO)); + parent.add(newStringField("isParent", "yes", Field.Store.NO)); + + List docs = new ArrayList(); + + Document child = new Document(); + docs.add(child); + child.add(new StoredField("childID", "0")); + child.add(newTextField("childText", "text", Field.Store.NO)); + + // parent last: + docs.add(parent); + w.addDocuments(docs); + + docs.clear(); + + parent = new Document(); + parent.add(newTextField("parentText", "text", Field.Store.NO)); + parent.add(newStringField("isParent", "yes", Field.Store.NO)); + parent.add(new StoredField("parentID", "1")); + + // parent last: + docs.add(parent); + w.addDocuments(docs); + + IndexReader r = w.getReader(); + w.close(); + + // never matches: + Query childQuery = new TermQuery(new Term("childText", "bogus")); + Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); + ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); + BooleanQuery parentQuery = new BooleanQuery(); + parentQuery.add(childJoinQuery, Occur.SHOULD); + parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD); + + ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)), + 10, true, true); + newSearcher(r).search(parentQuery, c); + TopGroups groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false); + + // Two parents: + assertEquals(2, groups.totalGroupCount.intValue()); + + // One child docs: + assertEquals(0, groups.totalGroupedHitCount); + + GroupDocs group = groups.groups[0]; + Document doc = r.document(group.groupValue.intValue()); + assertEquals("0", doc.get("parentID")); + System.out.println("group: " + group); + + group = groups.groups[1]; + doc = r.document(group.groupValue.intValue()); + assertEquals("1", doc.get("parentID")); + + r.close(); + d.close(); + } + + // LUCENE-4968 + public void testChildQueryMatchesParent() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Document parent = new Document(); + parent.add(new StoredField("parentID", "0")); + parent.add(newTextField("parentText", "text", Field.Store.NO)); + parent.add(newStringField("isParent", "yes", Field.Store.NO)); + + List docs = new ArrayList(); + + Document child = new Document(); + docs.add(child); + child.add(new StoredField("childID", "0")); + child.add(newTextField("childText", "text", Field.Store.NO)); + + // parent last: + docs.add(parent); + w.addDocuments(docs); + + docs.clear(); + + parent = new Document(); + parent.add(newTextField("parentText", "text", Field.Store.NO)); + parent.add(newStringField("isParent", "yes", Field.Store.NO)); + parent.add(new StoredField("parentID", "1")); + + // parent last: + docs.add(parent); + w.addDocuments(docs); + + IndexReader r = w.getReader(); + w.close(); + + // illegally matches parent: + Query childQuery = new TermQuery(new Term("parentText", "text")); + Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); + ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); + BooleanQuery parentQuery = new BooleanQuery(); + parentQuery.add(childJoinQuery, Occur.SHOULD); + parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD); + + ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)), + 10, true, true); + + try { + newSearcher(r).search(parentQuery, c); + fail("should have hit exception"); + } catch (IllegalStateException ise) { + // expected + } + + r.close(); + d.close(); + } } Index: lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (revision 1477345) +++ lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (working copy) @@ -195,10 +195,8 @@ @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs()); - if (scorer != null) { - if (scorer.advance(doc) == doc) { - return scorer.explain(context.docBase); - } + if (scorer != null && scorer.advance(doc) == doc) { + return scorer.explain(context.docBase); } return new ComplexExplanation(false, 0.0f, "Not a match"); } @@ -246,6 +244,10 @@ return childDocUpto; } + int getParentDoc() { + return parentDoc; + } + int[] swapChildDocs(int[] other) { final int[] ret = pendingChildDocs; if (other == null) { @@ -272,7 +274,6 @@ @Override public int nextDoc() throws IOException { //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc); - // Loop until we hit a parentDoc that's accepted while (true) { if (nextChildDoc == NO_MORE_DOCS) { @@ -285,6 +286,12 @@ parentDoc = parentBits.nextSetBit(nextChildDoc); + // Parent & child docs are supposed to be + // orthogonal: + if (nextChildDoc == parentDoc) { + throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass()); + } + //System.out.println(" parentDoc=" + parentDoc); assert parentDoc != -1; @@ -295,6 +302,13 @@ do { nextChildDoc = childScorer.nextDoc(); } while (nextChildDoc < parentDoc); + + // Parent & child docs are supposed to be + // orthogonal: + if (nextChildDoc == parentDoc) { + throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass()); + } + continue; } @@ -326,8 +340,11 @@ nextChildDoc = childScorer.nextDoc(); } while (nextChildDoc < parentDoc); - // Parent & child docs are supposed to be orthogonal: - assert nextChildDoc != parentDoc; + // Parent & child docs are supposed to be + // orthogonal: + if (nextChildDoc == parentDoc) { + throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass()); + } switch(scoreMode) { case Avg: @@ -343,7 +360,7 @@ break; } - //System.out.println(" return parentDoc=" + parentDoc); + //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto); return parentDoc; } } @@ -393,7 +410,9 @@ } // Parent & child docs are supposed to be orthogonal: - assert nextChildDoc != prevParentDoc; + if (nextChildDoc == prevParentDoc) { + throw new IllegalStateException("child query must only match non-parent docs, but parent docID=" + nextChildDoc + " matched childScorer=" + childScorer.getClass()); + } final int nd = nextDoc(); //System.out.println(" return nextParentDoc=" + nd); Index: lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java =================================================================== --- lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (revision 1477345) +++ lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (working copy) @@ -111,6 +111,7 @@ if (trackMaxScore) { maxScore = Float.MIN_VALUE; } + //System.out.println("numParentHits=" + numParentHits); this.trackScores = trackScores; this.numParentHits = numParentHits; queue = FieldValueHitQueue.create(sort.getSort(), numParentHits); @@ -122,6 +123,7 @@ private static final class OneGroup extends FieldValueHitQueue.Entry { public OneGroup(int comparatorSlot, int parentDoc, float parentScore, int numJoins, boolean doScores) { super(comparatorSlot, parentDoc, parentScore); + //System.out.println("make OneGroup parentDoc=" + parentDoc); docs = new int[numJoins][]; for(int joinID=0;joinID= og.counts[scorerIDX]: "length=" + og.scores[scorerIDX].length + " vs count=" + og.counts[scorerIDX]; } + } else { + og.counts[scorerIDX] = 0; } } } @@ -302,13 +309,16 @@ Arrays.fill(joinScorers, null); Queue queue = new LinkedList(); + //System.out.println("\nqueue: add top scorer=" + scorer); queue.add(scorer); while ((scorer = queue.poll()) != null) { + //System.out.println(" poll: " + scorer + "; " + scorer.getWeight().getQuery()); if (scorer instanceof ToParentBlockJoinQuery.BlockJoinScorer) { enroll((ToParentBlockJoinQuery) scorer.getWeight().getQuery(), (ToParentBlockJoinQuery.BlockJoinScorer) scorer); } for (ChildScorer sub : scorer.getChildren()) { + //System.out.println(" add sub: " + sub.child + "; " + sub.child.getWeight().getQuery()); queue.add(sub.child); } } @@ -384,12 +394,8 @@ throws IOException { final Integer _slot = joinQueryID.get(query); - if (_slot == null) { - if (totalHitCount == 0) { - return null; - } else { - throw new IllegalArgumentException("the Query did not contain the provided BlockJoinQuery"); - } + if (_slot == null && totalHitCount == 0) { + return null; } if (sortedGroups == null) { @@ -401,7 +407,7 @@ return null; } - return accumulateGroups(_slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields); + return accumulateGroups(_slot == null ? -1 : _slot.intValue(), offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields); } /** @@ -423,18 +429,26 @@ final FakeScorer fakeScorer = new FakeScorer(); int totalGroupedHitCount = 0; + //System.out.println("slot=" + slot); for(int groupIDX=offset;groupIDX= og.counts.length) { + numChildDocs = 0; + } else { + numChildDocs = og.counts[slot]; + } // Number of documents in group should be bounded to prevent redundant memory allocation - final int numDocsInGroup = Math.min(numChildDocs, maxDocsPerGroup); + final int numDocsInGroup = Math.max(1, Math.min(numChildDocs, maxDocsPerGroup)); + //System.out.println("parent doc=" + og.doc + " numChildDocs=" + numChildDocs + " maxDocsPG=" + maxDocsPerGroup); // At this point we hold all docs w/ in each group, // unsorted; we now sort them: final TopDocsCollector collector; if (withinGroupSort == null) { + //System.out.println("sort by score"); // Sort by score if (!trackScores) { throw new IllegalArgumentException("cannot sort by relevance within group: trackScores=false"); @@ -448,6 +462,7 @@ collector.setScorer(fakeScorer); collector.setNextReader(og.readerContext); for(int docIDX=0;docIDX