Index: java/engine/org/apache/derby/impl/sql/compile/Predicate.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/Predicate.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/Predicate.java (working copy) @@ -509,7 +509,8 @@ { // if any term of the OR clause is not a qualifier, then // reject the entire OR clause. - if (!((RelationalOperator) or_node.getLeftOperand()).isQualifier(optTable)) + if (!((RelationalOperator) or_node.getLeftOperand()). + isQualifier(optTable, true)) { // one of the terms is not a pushable Qualifier. return(false); @@ -852,10 +853,9 @@ * Determine whether or not this predicate is eligible for * push-down into subqueries. Right now the only predicates * we consider to be eligible are those which 1) are Binary - * Relational operator nodes, 2) have a column reference - * on BOTH sides, and 3) have column references such that - * each column reference has a reference to a base table - * somewhere beneath it. + * Relational operator nodes and 2) have a column reference + * on BOTH sides, each of which has a reference to a base + * table somewhere beneath it. * * @return Whether or not this predicate is eligible to be * pushed into subqueries. @@ -863,24 +863,9 @@ protected boolean pushableToSubqueries() throws StandardException { - // If the predicate isn't a binary relational operator, - // then we don't push it. - if (!(getAndNode().getLeftOperand() - instanceof BinaryRelationalOperatorNode)) - { + if (!isJoinPredicate()) return false; - } - BinaryRelationalOperatorNode opNode = - (BinaryRelationalOperatorNode)getAndNode().getLeftOperand(); - - // If either side is not a column reference, we don't push. - if (!((opNode.getLeftOperand() instanceof ColumnReference) && - (opNode.getRightOperand() instanceof ColumnReference))) - { - return false; - } - // Make sure both column references ultimately point to base // tables. If, for example, either column reference points to a // a literal or an aggregate, then we do not push the predicate. @@ -891,6 +876,9 @@ // such column references, but it's not clear whether that's // always a safe option; further investigation required. + BinaryRelationalOperatorNode opNode = + (BinaryRelationalOperatorNode)getAndNode().getLeftOperand(); + JBitSet tNums = new JBitSet(getReferencedSet().size()); BaseTableNumbersVisitor btnVis = new BaseTableNumbersVisitor(tNums); opNode.getLeftOperand().accept(btnVis); @@ -906,6 +894,31 @@ } /** + * Is this predicate a join predicate? In order to be so, + * it must be a binary relational operator node that has + * a column reference on both sides. + * + * @return Whether or not this is a join predicate. + */ + protected boolean isJoinPredicate() + { + // If the predicate isn't a binary relational operator, + // then it's not a join predicate. + if (!(getAndNode().getLeftOperand() + instanceof BinaryRelationalOperatorNode)) + { + return false; + } + + BinaryRelationalOperatorNode opNode = + (BinaryRelationalOperatorNode)getAndNode().getLeftOperand(); + + // If both sides are column references then this is a join pred. + return ((opNode.getLeftOperand() instanceof ColumnReference) && + (opNode.getRightOperand() instanceof ColumnReference)); + } + + /** * If this predicate's operator is a BinaryRelationalOperatorNode, * then look at the operands and return a new, equivalent predicate * that is "scoped" to the received ResultSetNode. By "scoped" we Index: java/engine/org/apache/derby/impl/sql/compile/ProjectRestrictNode.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/ProjectRestrictNode.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/ProjectRestrictNode.java (working copy) @@ -360,15 +360,38 @@ childCost.rowCount(), childCost.singleScanRowCount()); - getBestAccessPath().setCostEstimate(costEstimate); + /* Note: Prior to the fix for DERBY-781 we had calls here + * to set the cost estimate for BestAccessPath and + * BestSortAvoidancePath to equal costEstimate. That used + * to be okay because prior to DERBY-781 we would only + * get here once (per join order) for a given SelectNode/ + * RowResultSetNode and thus we could safely say that the + * costEstimate from the most recent call to "optimize()" + * was the best one so far (because we knew that we would + * only call childResult.optimize() once). Now that we + * support hash joins with subqueries, though, we can get + * here twice per join order: once when the optimizer is + * considering a nested loop join with this PRN, and once + * when it is considering a hash join. This means we can't + * just arbitrarily use the cost estimate for the most recent + * "optimize()" as the best cost because that may not + * be accurate--it's possible that the above call to + * childResult.optimize() was for a hash join, but that + * we were here once before (namely for nested loop) and + * the cost of the nested loop is actually less than + * the cost of the hash join. In that case it would + * be wrong to use costEstimate as the cost of the "best" + * paths because it (costEstimate) holds the cost of + * the hash join, not of the nested loop join. So with + * DERBY-781 the following calls were removed: + * getBestAccessPath().setCostEstimate(costEstimate); + * getBestSortAvoidancePath().setCostEstimate(costEstimate); + * If costEstimate *does* actually hold the estimate for + * the best path so far, then we will set BestAccessPath + * and BestSortAvoidancePath as needed in the following + * call to "considerCost". + */ - /* - ** The current access path may not be part of a sort avoidance - ** path, but set the cost estimate there anyway, just in case - ** it is. - */ - getBestSortAvoidancePath().setCostEstimate(costEstimate); - // childResultOptimized = true; /* RESOLVE - ARBITRARYHASHJOIN - Passing restriction list here, as above, is correct. @@ -666,8 +689,20 @@ } } + // If we're doing a hash join with _this_ PRN (as opposed to + // with this PRN's child) then we don't attempt to push + // predicates down. There are two reasons for this: 1) + // we don't want to push the equijoin predicate that is + // required for the hash join, and 2) if we're doing a + // hash join then we're going to materialize this node, + // but if we push predicates before materialization, we + // can end up with incorrect results (esp. missing rows). + // So don't push anything in this case. + boolean hashJoinWithThisPRN = hasTrulyTheBestAccessPath && + (trulyTheBestAccessPath.getJoinStrategy() != null) && + trulyTheBestAccessPath.getJoinStrategy().isHashJoin(); - if ((restrictionList != null) && !alreadyPushed) + if ((restrictionList != null) && !alreadyPushed && !hashJoinWithThisPRN) { restrictionList.pushUsefulPredicates((Optimizable) childResult); } @@ -717,6 +752,45 @@ private Optimizable replaceWithHashTableNode() throws StandardException { + // If this PRN has TTB access path for its child, store that access + // path in the child here, so that we can find it later when it + // comes time to generate qualifiers for the hash predicates (we + // need the child's access path when generating qualifiers; if we + // don't pass the path down here, the child won't be able to find + // it). + if (hasTrulyTheBestAccessPath) + { + ((FromTable)childResult).trulyTheBestAccessPath = + (AccessPathImpl)getTrulyTheBestAccessPath(); + + // If the child itself is another SingleChildResultSetNode + // (which is also what a ProjectRestrictNode is), then tell + // it that it is now holding TTB path for it's own child. Again, + // this info is needed so that child knows where to find the + // access path at generation time. + if (childResult instanceof SingleChildResultSetNode) + { + ((SingleChildResultSetNode)childResult) + .hasTrulyTheBestAccessPath = hasTrulyTheBestAccessPath; + + // While we're at it, add the PRN's table number to the + // child's referenced map so that we can find the equijoin + // predicate. We have to do this because the predicate + // will be referencing the PRN's tableNumber, not the + // child's--and since we use the child as the target + // when searching for hash keys (as can be seen in + // HashJoinStrategy.divideUpPredicateLists()), the child + // should know what this PRN's table number is. This + // is somewhat bizarre since the child doesn't + // actually "reference" this PRN, but since the child's + // reference map is used when searching for the equijoin + // predicate (see "buildTableNumList" in + // BinaryRelationalOperatorNode), this is the simplest + // way to pass this PRN's table number down. + childResult.getReferencedTableMap().set(tableNumber); + } + } + /* We want to divide the predicate list into 3 separate lists - * o predicates against the source of the hash table, which will * be applied on the way into the hash table (searchRestrictionList) @@ -844,25 +918,6 @@ } } - /** @see Optimizable#isMaterializable - * - * @exception StandardException Thrown on error - */ - public boolean isMaterializable() - throws StandardException - { - /* RESOLVE - Disallow arbitrary hash joins on - * SELECTS within a derived table for now. - * Remove this method once that restriction is removed. - */ - if (! (childResult instanceof Optimizable)) - { - return false; - } - - return super.isMaterializable(); - } - /** * @see Optimizable#uniqueJoin * Index: java/engine/org/apache/derby/impl/sql/compile/UnaryComparisonOperatorNode.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/UnaryComparisonOperatorNode.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/UnaryComparisonOperatorNode.java (working copy) @@ -166,17 +166,7 @@ } ft = (FromBaseTable) optTable; - - return getColumnOperand(ft.getTableNumber(), columnPosition); - } - - /** @see RelationalOperator#getColumnOperand */ - public ColumnReference getColumnOperand( - int tableNumber, - int columnPosition) - { ColumnReference cr; - if (operand instanceof ColumnReference) { /* @@ -184,7 +174,7 @@ ** Is it the correct column? */ cr = (ColumnReference) operand; - if (cr.getTableNumber() == tableNumber) + if (cr.getTableNumber() == ft.getTableNumber()) { /* The table is correct, how about the column position? */ if (cr.getSource().getColumnPosition() == columnPosition) @@ -222,6 +212,58 @@ return null; } + /** @see RelationalOperator#getOperand */ + public ValueNode getOperand(ColumnReference cRef, int refSetSize, + boolean otherSide) + { + if (otherSide) + // there is no "other" side for Unary, so just return null. + return null; + + ColumnReference cr; + if (operand instanceof ColumnReference) + { + /* + ** The operand is a column reference. + ** Is it the correct column? + */ + JBitSet cRefTables = new JBitSet(refSetSize); + JBitSet crTables = new JBitSet(refSetSize); + BaseTableNumbersVisitor btnVis = + new BaseTableNumbersVisitor(crTables); + + cr = (ColumnReference) operand; + try { + cr.accept(btnVis); + btnVis.setTableMap(cRefTables); + cRef.accept(btnVis); + } catch (StandardException se) { + if (SanityManager.DEBUG) + { + SanityManager.THROWASSERT("Failed when trying to " + + "find base table number for column reference check:\n" + + se.getMessage()); + } + } + crTables.and(cRefTables); + if (crTables.getFirstSetBit() != -1) + { + /* + ** The table is correct, how about the column position? + */ + if (cr.getSource().getColumnPosition() == + cRef.getColumnNumber()) + { + /* We've found the correct column - return it. */ + return operand; + } + } + } + + /* Not the column we're looking for */ + return null; + } + /** @see RelationalOperator#selfComparison */ public boolean selfComparison(ColumnReference cr) { @@ -242,7 +284,8 @@ * @see RelationalOperator#getExpressionOperand */ public ValueNode getExpressionOperand(int tableNumber, - int columnNumber) + int columnNumber, + FromTable ft) { return null; } @@ -384,7 +427,7 @@ } /** @see RelationalOperator#isQualifier */ - public boolean isQualifier(Optimizable optTable) + public boolean isQualifier(Optimizable optTable, boolean forPush) { /* ** It's a Qualifier if the operand is a ColumnReference referring Index: java/engine/org/apache/derby/impl/sql/compile/RelationalOperator.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/RelationalOperator.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/RelationalOperator.java (working copy) @@ -68,25 +68,6 @@ ColumnReference getColumnOperand(Optimizable optTable, int columnPosition); /** - * Check whether this RelationalOperator is a comparison of the given - * column with an expression. If so, return the ColumnReference that - * corresponds to the given column, and that is on one side of this - * RelationalOperator or the other (this method copes with the - * column being on either side of the operator). If the given column - * does not appear by itself on one side of the comparison, the - * method returns null. - * - * @param tableNumber The table number of the table in question - * @param columnPosition The ordinal position of the column (one-based) - * - * @return The ColumnReference on one side of this RelationalOperator - * that represents the given columnPosition. Returns null - * if no such ColumnReference exists by itself on one side of - * this RelationalOperator. - */ - ColumnReference getColumnOperand(int tableNumber, int columnPosition); - - /** * Get the ColumnReference for the given table on one side of this * RelationalOperator. This presumes it will be found only on one * side. If not found, it will return null. @@ -94,18 +75,42 @@ ColumnReference getColumnOperand(Optimizable optTable); /** + * Find the operand (left or right) that points to the same table + * as the received ColumnReference, and then return either that + * operand or the "other" operand, depending on the value of + * otherSide. This presumes it will be found only on one + * side. If not found, it will return null. + * + * @param cRef The ColumnReference for which we're searching. + * @param refSetSize Size of the referenced map for the predicate + * represented by this RelationalOperator node. This is used + * for storing base table numbers when searching for cRef. + * @param otherSide Assuming we find an operand that points to + * the same table as cRef, then we will return the *other* + * operand if otherSide is true; else we'll return the operand + * that matches cRef. + */ + ValueNode getOperand(ColumnReference cRef, int refSetSize, + boolean otherSide); + + /** * Check whether this RelationalOperator is a comparison of the given * column with an expression. If so, return the expression * the column is being compared to. * * @param tableNumber The table number of the base table the column is in * @param columnPosition The ordinal position of the column (one-based) + * @param ft We'll look for the column in all tables at and beneath ft. + * This is useful if ft is, say, a ProjectRestrictNode over a subquery-- + * then we want to look at all of the FROM tables in the subquery to try + * to find the right column. * * @return The ValueNode for the expression the column is being compared * to - null if the column is not being compared to anything. */ ValueNode getExpressionOperand(int tableNumber, - int columnPosition); + int columnPosition, + FromTable ft); /** * Check whether this RelationalOperator is a comparison of the given @@ -277,14 +282,40 @@ * from that table on one side of this relop, and an expression that * does not refer to the table on the other side of the relop. * + * Note that this method has two uses: 1) see if this operator (or + * more specifically, the predicate to which this operator belongs) + * can be used as a join predicate (esp. for a hash join), and 2) + * see if this operator can be pushed to the target optTable. We + * use the parameter "forPush" to distinguish between the two uses + * because in some cases (esp. situations where we have subqueries) + * the answer to "is this a qualifier?" can differ depending on + * whether or not we're pushing. In particular, for binary ops + * that are join predicates, if we're just trying to find an + * equijoin predicate then this op qualifies if it references either + * the target table OR any of the base tables in the table's subtree. + * But if we're planning to push the predicate down to the target + * table, this op only qualifies if it references the target table + * directly. This difference in behavior is required because in + * case 1 (searching for join predicates), the operator remains at + * its current level in the tree even if its operands reference + * nodes further down; in case 2, though, we'll end up pushing + * the operator down the tree to child node(s) and that requires + * additional logic, such as "scoping" consideration. Until + * that logic is in place, we don't search a subtree if the intent + * is to push the predicate to which this operator belongs further + * down that subtree. See BinaryRelationalOperatorNode for an + * example of where this comes into play. + * * @param optTable The Optimizable table in question. + * @param forPush Are we asking because we're trying to push? * * @return true if this operator can be compiled into a Qualifier * for the given Optimizable table. * * @exception StandardException Thrown on error */ - boolean isQualifier(Optimizable optTable) throws StandardException; + boolean isQualifier(Optimizable optTable, boolean forPush) + throws StandardException; /** * Return the operator (as an int) for this RelationalOperator. Index: java/engine/org/apache/derby/impl/sql/compile/BinaryRelationalOperatorNode.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/BinaryRelationalOperatorNode.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/BinaryRelationalOperatorNode.java (working copy) @@ -65,6 +65,15 @@ private int operatorType; /* RelationalOperator Interface */ + // Visitor for finding base tables beneath optimizables and column + // references. Created once and re-used thereafter. + private BaseTableNumbersVisitor btnVis; + + // Bit sets for holding base tables beneath optimizables and + // column references. Created once and re-used thereafter. + JBitSet optBaseTables; + JBitSet valNodeBaseTables; + public void init(Object leftOperand, Object rightOperand) { String methodName = ""; @@ -115,6 +124,7 @@ break; } super.init(leftOperand, rightOperand, operatorName, methodName); + btnVis = null; } /** @see RelationalOperator#getColumnOperand */ @@ -124,16 +134,13 @@ { FromTable ft = (FromTable) optTable; - return getColumnOperand(ft.getTableNumber(), columnPosition); - } + // When searching for a matching column operand, we search + // the entire subtree (if there is one) beneath optTable + // to see if we can find any FromTables that correspond to + // either of this op's column references. - /** @see RelationalOperator#getColumnOperand */ - public ColumnReference getColumnOperand( - int tableNumber, - int columnPosition) - { ColumnReference cr; - + boolean walkSubtree = true; if (leftOperand instanceof ColumnReference) { /* @@ -141,7 +148,7 @@ ** Is it the correct column? */ cr = (ColumnReference) leftOperand; - if (cr.getTableNumber() == tableNumber) + if (valNodeReferencesOptTable(cr, ft, false, walkSubtree)) { /* ** The table is correct, how about the column position? @@ -152,6 +159,7 @@ return cr; } } + walkSubtree = false; } if (rightOperand instanceof ColumnReference) @@ -161,7 +169,7 @@ ** Is it the correct column? */ cr = (ColumnReference) rightOperand; - if (cr.getTableNumber() == tableNumber) + if (valNodeReferencesOptTable(cr, ft, false, walkSubtree)) { /* ** The table is correct, how about the column position? @@ -183,6 +191,7 @@ { ColumnReference cr; + boolean walkSubtree = true; if (leftOperand instanceof ColumnReference) { /* @@ -190,13 +199,15 @@ ** Is it the correct column? */ cr = (ColumnReference) leftOperand; - if (cr.getTableNumber() == optTable.getTableNumber()) + if (valNodeReferencesOptTable( + cr, (FromTable)optTable, false, walkSubtree)) { /* ** The table is correct. */ return cr; } + walkSubtree = false; } if (rightOperand instanceof ColumnReference) @@ -206,7 +217,8 @@ ** Is it the correct column? */ cr = (ColumnReference) rightOperand; - if (cr.getTableNumber() == optTable.getTableNumber()) + if (valNodeReferencesOptTable(cr, + (FromTable)optTable, false, walkSubtree)) { /* ** The table is correct @@ -224,9 +236,11 @@ */ public ValueNode getExpressionOperand( int tableNumber, - int columnPosition) + int columnPosition, + FromTable ft) { ColumnReference cr; + boolean walkSubtree = true; if (leftOperand instanceof ColumnReference) { @@ -235,7 +249,7 @@ ** Is it the correct column? */ cr = (ColumnReference) leftOperand; - if (cr.getTableNumber() == tableNumber) + if (valNodeReferencesOptTable(cr, ft, false, walkSubtree)) { /* ** The table is correct, how about the column position? @@ -249,6 +263,7 @@ return rightOperand; } } + walkSubtree = false; } if (rightOperand instanceof ColumnReference) @@ -258,7 +273,7 @@ ** Is it the correct column? */ cr = (ColumnReference) rightOperand; - if (cr.getTableNumber() == tableNumber) + if (valNodeReferencesOptTable(cr, ft, false, walkSubtree)) { /* ** The table is correct, how about the column position? @@ -278,6 +293,101 @@ } /** + * @see RelationalOperator#getOperand + */ + public ValueNode getOperand(ColumnReference cRef, + int refSetSize, boolean otherSide) + { + // Following call will initialize/reset the btnVis, + // valNodeBaseTables, and optBaseTables fields of this object. + initBaseTableVisitor(refSetSize, true); + + // We search for the column reference by getting the *base* + // table number for each operand and checking to see if + // that matches the *base* table number for the cRef + // that we're looking for. If so, then we the two + // reference the same table so we go on to check + // column position. + try { + + // Use optBaseTables for cRef's base table numbers. + btnVis.setTableMap(optBaseTables); + cRef.accept(btnVis); + + // Use valNodeBaseTables for operand base table nums. + btnVis.setTableMap(valNodeBaseTables); + + ColumnReference cr; + if (leftOperand instanceof ColumnReference) + { + /* + ** The left operand is a column reference. + ** Is it the correct column? + */ + cr = (ColumnReference) leftOperand; + cr.accept(btnVis); + valNodeBaseTables.and(optBaseTables); + if (valNodeBaseTables.getFirstSetBit() != -1) + { + /* + ** The table is correct, how about the column position? + */ + if (cr.getSource().getColumnPosition() == + cRef.getColumnNumber()) + { + /* + ** We've found the correct column - + ** return the appropriate side. + */ + if (otherSide) + return rightOperand; + return leftOperand; + } + } + } + + if (rightOperand instanceof ColumnReference) + { + /* + ** The right operand is a column reference. + ** Is it the correct column? + */ + valNodeBaseTables.clearAll(); + cr = (ColumnReference) rightOperand; + cr.accept(btnVis); + valNodeBaseTables.and(optBaseTables); + if (valNodeBaseTables.getFirstSetBit() != -1) + { + /* + ** The table is correct, how about the column position? + */ + if (cr.getSource().getColumnPosition() == + cRef.getColumnNumber()) + { + /* + ** We've found the correct column - + ** return the appropriate side + */ + if (otherSide) + return leftOperand; + return rightOperand; + } + } + } + + } catch (StandardException se) { + if (SanityManager.DEBUG) + { + SanityManager.THROWASSERT("Failed when trying to " + + "find base table number for column reference check:\n" + + se.getMessage()); + } + } + + return null; + } + + /** * @see RelationalOperator#generateExpressionOperand * * @exception StandardException Thrown on error @@ -298,8 +408,8 @@ } ft = (FromBaseTable) optTable; - ValueNode exprOp = - getExpressionOperand(ft.getTableNumber(), columnPosition); + ValueNode exprOp = getExpressionOperand( + ft.getTableNumber(), columnPosition, ft); if (SanityManager.DEBUG) { @@ -388,11 +498,8 @@ protected boolean keyColumnOnLeft(Optimizable optTable) { ColumnReference cr; - FromTable ft; boolean left = false; - ft = (FromTable) optTable; - /* Is the key column on the left or the right? */ if (leftOperand instanceof ColumnReference) { @@ -401,23 +508,25 @@ ** Is it the correct column? */ cr = (ColumnReference) leftOperand; - if (cr.getTableNumber() == ft.getTableNumber()) + if (valNodeReferencesOptTable( + cr, (FromTable)optTable, false, true)) { /* The left operand is the key column */ left = true; } } - else + + // Else the right operand must be the key column. + if (SanityManager.DEBUG) { - if (SanityManager.DEBUG) - { - SanityManager.ASSERT((rightOperand instanceof ColumnReference) && - (((ColumnReference) rightOperand).getTableNumber() == - ft.getTableNumber()), - "Key column not found on either side."); - } - /* The right operand is the key column */ - left = false; + if (!left) + { + SanityManager.ASSERT( + (rightOperand instanceof ColumnReference) && + valNodeReferencesOptTable((ColumnReference)rightOperand, + (FromTable)optTable, false, true), + "Key column not found on either side."); + } } return left; @@ -443,6 +552,7 @@ { ColumnReference cr; boolean left = false; + boolean walkSubtree = true; /* Is a column on the left */ if (leftOperand instanceof ColumnReference) @@ -452,11 +562,13 @@ ** Is it the correct column? */ cr = (ColumnReference) leftOperand; - if (cr.getTableNumber() == optTable.getTableNumber()) + if (valNodeReferencesOptTable( + cr, (FromTable)optTable, false, walkSubtree)) { /* Key column found on left */ return LEFT; } + walkSubtree = false; } if (rightOperand instanceof ColumnReference) @@ -466,7 +578,8 @@ ** Is it the correct column? */ cr = (ColumnReference) rightOperand; - if (cr.getTableNumber() == optTable.getTableNumber()) + if (valNodeReferencesOptTable( + cr, (FromTable)optTable, false, walkSubtree)) { /* Key column found on right */ return RIGHT; @@ -621,7 +734,7 @@ * * @exception StandardException Thrown on error */ - public boolean isQualifier(Optimizable optTable) + public boolean isQualifier(Optimizable optTable, boolean forPush) throws StandardException { FromTable ft; @@ -629,6 +742,7 @@ JBitSet tablesReferenced; ColumnReference cr = null; boolean found = false; + boolean walkSubtree = true; ft = (FromTable) optTable; @@ -639,11 +753,12 @@ ** Is it the correct column? */ cr = (ColumnReference) leftOperand; - if (cr.getTableNumber() == ft.getTableNumber()) + if (valNodeReferencesOptTable(cr, ft, forPush, walkSubtree)) { otherSide = rightOperand; found = true; } + walkSubtree = false; } if ( ( ! found) && (rightOperand instanceof ColumnReference) ) @@ -653,7 +768,7 @@ ** Is it the correct column? */ cr = (ColumnReference) rightOperand; - if (cr.getTableNumber() == ft.getTableNumber()) + if (valNodeReferencesOptTable(cr, ft, forPush, walkSubtree)) { otherSide = leftOperand; found = true; @@ -675,9 +790,7 @@ ** Qualifier if the other side does not refer to the table we are ** optimizing. */ - tablesReferenced = otherSide.getTablesReferenced(); - - return ! (tablesReferenced.get(ft.getTableNumber())); + return !valNodeReferencesOptTable(otherSide, ft, forPush, true); } /** @@ -1320,6 +1433,143 @@ return (ValueNode)cr.getClone(); } -} + /** + * Determine whether or not the received ValueNode (which will + * usually be a ColumnReference) references either the received + * optTable or else a base table in the subtree beneath that + * optTable. + * + * @param valNode The ValueNode that has the reference(s). + * @param optTable The table/subtree node to which we're trying + * to find a reference. + * @param forPush Whether or not we are searching with the intent + * to push this operator to the target table. + * @param walkOptTableSubtree Should we walk the subtree beneath + * optTable to find base tables, or not? Will be false if we've + * already done it for the left operand and now we're here + * for the right operand. + * @return True if valNode contains a reference to optTable or + * to a base table in the subtree beneath optTable; false + * otherwise. + */ + private boolean valNodeReferencesOptTable(ValueNode valNode, + FromTable optTable, boolean forPush, boolean walkOptTableSubtree) + { + // Following call will initialize/reset the btnVis, + // valNodeBaseTables, and optBaseTables fields of this object. + initBaseTableVisitor(optTable.getReferencedTableMap().size(), + walkOptTableSubtree); + boolean found = false; + try { + // Find all base tables beneath optTable and load them + // into this object's optBaseTables map. This is the + // list of table numbers we'll search to see if the + // value node references any tables in the subtree at + // or beneath optTable. + if (walkOptTableSubtree) + buildTableNumList(optTable, forPush); + + // Now get the base table numbers that are in valNode's + // subtree. In most cases valNode will be a ColumnReference + // and this will return a single base table number. + btnVis.setTableMap(valNodeBaseTables); + valNode.accept(btnVis); + + // And finally, see if there's anything in common. + valNodeBaseTables.and(optBaseTables); + found = (valNodeBaseTables.getFirstSetBit() != -1); + + } catch (StandardException se) { + if (SanityManager.DEBUG) + { + SanityManager.THROWASSERT("Failed when trying to " + + "find base table numbers for reference check:\n" + + se.getMessage()); + } + } + + return found; + } + + /** + * Initialize the fields used for retrieving base tables in + * subtrees, which allows us to do a more extensive search + * for table references. If the fields have already been + * created, then just reset their values. + * + * @param numTablesInQuery Used for creating JBitSets that + * can hold table numbers for the query. + * @param initOptBaseTables Whether or not we should clear out + * or initialize the optBaseTables bit set. + */ + private void initBaseTableVisitor(int numTablesInQuery, + boolean initOptBaseTables) + { + if (valNodeBaseTables == null) + valNodeBaseTables = new JBitSet(numTablesInQuery); + else + valNodeBaseTables.clearAll(); + + if (initOptBaseTables) + { + if (optBaseTables == null) + optBaseTables = new JBitSet(numTablesInQuery); + else + optBaseTables.clearAll(); + } + + // Now create the visitor. We give it valNodeBaseTables + // here for sake of creation, but this can be overridden + // (namely, by optBaseTables) by the caller of this method. + if (btnVis == null) + btnVis = new BaseTableNumbersVisitor(valNodeBaseTables); + } + + /** + * Create a set of table numbers to search when trying to find + * which (if either) of this operator's operands reference the + * received target table. At the minimum this set should contain + * the target table's own table number. After that, if we're + * _not_ attempting to push this operator (or more specifically, + * the predicate to which this operator belongs) to the target + * table, we go on to search the subtree beneath the target + * table and add any base table numbers to the searchable list. + * + * @param ft Target table for which we're building the search + * list. + * @param forPush Whether or not we are searching with the intent + * to push this operator to the target table. + */ + private void buildTableNumList(FromTable ft, boolean forPush) + throws StandardException + { + // Start with the target table's own table number. Note + // that if ft is an instanceof SingleChildResultSet, its + // table number could be negative. + if (ft.getTableNumber() >= 0) + optBaseTables.set(ft.getTableNumber()); + + if (forPush) + // nothing else to do. + return; + + // Add any table numbers from the target table's + // reference map. + optBaseTables.or(ft.getReferencedTableMap()); + + // The table's reference map is not guaranteed to have + // all of the tables that are actually used--for example, + // if the table is a ProjectRestrictNode or a JoinNode + // with a subquery as a child, the ref map will contain + // the number for the PRN above the subquery, but it + // won't contain the table numbers referenced by the + // subquery. So here we go through and find ALL base + // table numbers beneath the target node. + btnVis.setTableMap(optBaseTables); + ft.accept(btnVis); + return; + } + +} Index: java/engine/org/apache/derby/impl/sql/compile/HashJoinStrategy.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/HashJoinStrategy.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/HashJoinStrategy.java (working copy) @@ -51,6 +51,8 @@ import org.apache.derby.iapi.services.io.FormatableArrayHolder; import org.apache.derby.iapi.services.io.FormatableIntHolder; +import org.apache.derby.iapi.util.JBitSet; + import java.util.Vector; public class HashJoinStrategy extends BaseJoinStrategy { @@ -95,6 +97,60 @@ return false; } + /* If the predicate given by the user _directly_ references + * any of the base tables _beneath_ this node, then we + * cannot safely use the predicate for a hash because the + * predicate correlates two nodes at different nesting levels. + * If we did a hash join in this case, materialization of + * innerTable could lead to incorrect results--and in particular, + * results that are missing rows. We can check for this by + * looking at the predicates' reference maps, which are set based + * on the initial query (as part of pre-processing). Note that + * by the time we get here, it's possible that a predicate's + * reference map holds table numbers that do not agree with the + * table numbers of the column references used by the predicate. + * That's okay--this occurs as a result of "remapping" predicates + * that have been pushed down the query tree. And in fact + * it's a good thing because, by looking at the column reference's + * own table numbers instead of the predicate's referenced map, + * we are more readily able to find equijoin predicates that + * we otherwise would not have found. + * + * Note: do not perform this check if innerTable is a FromBaseTable + * because a base table does not have a "subtree" to speak of. + */ + if ((predList != null) && (predList.size() > 0) && + !(innerTable instanceof FromBaseTable)) + { + FromTable ft = (FromTable)innerTable; + + // First get a list of all of the base tables in the subtree + // below innerTable. + JBitSet tNums = new JBitSet(ft.getReferencedTableMap().size()); + BaseTableNumbersVisitor btnVis = new BaseTableNumbersVisitor(tNums); + ft.accept(btnVis); + + // Now get a list of all table numbers referenced by the + // join predicates that we'll be searching. + JBitSet pNums = new JBitSet(tNums.size()); + Predicate pred = null; + for (int i = 0; i < predList.size(); i++) + { + pred = (Predicate)predList.getOptPredicate(i); + if (pred.isJoinPredicate()) + pNums.or(pred.getReferencedSet()); + } + + // If tNums and pNums have anything in common, then at + // least one predicate in the list refers directly to + // a base table beneath this node (as opposed to referring + // just to this node), which means it's not safe to do a + // hash join. + tNums.and(pNums); + if (tNums.getFirstSetBit() != -1) + return false; + } + if (innerTable.isBaseTable()) { /* Must have an equijoin on a column in the conglomerate */ Index: java/engine/org/apache/derby/impl/sql/compile/OptimizerImpl.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/OptimizerImpl.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/OptimizerImpl.java (working copy) @@ -163,6 +163,24 @@ // the one that's actually going to be generated. CostEstimate finalCostEstimate; + /* Status variables used for "jumping" to previous best join + * order when possible. In particular, this helps when this + * optimizer corresponds to a subquery and we are trying to + * find out what the best join order is if we do a hash join + * with the subquery instead of a nested loop join. In that + * case the previous best join order will have the best join + * order for a nested loop, so we want to start there when + * considering hash join because odds are that same join order + * will give us the best cost for hash join, as well. We + * only try this, though, if neither the previous round of + * optimization nor this round relies on predicates that have + * been pushed down from above--because that's the scenario + * for which the best join order is likely to be same for + * consecutive rounds. + */ + private boolean usingPredsPushedFromAbove; + private boolean bestJoinOrderUsedPredsFromAbove; + protected OptimizerImpl(OptimizableList optimizableList, OptimizablePredicateList predicateList, DataDictionary dDictionary, @@ -241,6 +259,9 @@ reloadBestPlan = false; savedJoinOrders = null; timeLimit = Double.MAX_VALUE; + + usingPredsPushedFromAbove = false; + bestJoinOrderUsedPredsFromAbove = false; } /** @@ -300,7 +321,7 @@ * state to ensure that we have a chance to consider plans that * can take advantage of the pushed predicates. */ - boolean resetTimer = false; + usingPredsPushedFromAbove = false; if ((predicateList != null) && (predicateList.size() > 0)) { for (int i = predicateList.size() - 1; i >= 0; i--) @@ -310,13 +331,13 @@ if (((Predicate)predicateList. getOptPredicate(i)).isScopedForPush()) { - resetTimer = true; + usingPredsPushedFromAbove = true; break; } } } - if (resetTimer) + if (usingPredsPushedFromAbove) { timeOptimizationStarted = System.currentTimeMillis(); timeExceeded = false; @@ -377,7 +398,9 @@ } } - if (timeExceeded && bestCost.isUninitialized()) + if (bestCost.isUninitialized() && foundABestPlan && + ((!usingPredsPushedFromAbove && !bestJoinOrderUsedPredsFromAbove) + || timeExceeded)) { /* We can get here if this OptimizerImpl is for a subquery * that timed out for a previous permutation of the outer @@ -404,6 +427,17 @@ * because that's how our timeout value was set to begin with--so * if there was no best join order, we never would have timed out * and thus we wouldn't be here. + * + * We can also get here if we've already optimized the list + * of optimizables once (in a previous round of optimization) + * and now we're back to do it again. If that's true AND + * we did *not* receive any predicates pushed from above AND + * the bestJoinOrder from the previous round did *not* depend + * on predicates pushed from above, then we'll jump to the + * previous join order and start there. NOTE: if after jumping + * to the previous join order and calculating the cost we haven't + * timed out, we will continue looking at other join orders (as + * usual) until we exhaust them all or we time out. */ if (permuteState != JUMPING) { @@ -412,6 +446,8 @@ // jump to the target join order and get the cost. That // cost will then be saved as bestCost, allowing us to // proceed with normal timeout logic. + if (firstLookOrder == null) + firstLookOrder = new int[numOptimizables]; for (int i = 0; i < numOptimizables; i++) firstLookOrder[i] = bestJoinOrder[i]; permuteState = JUMPING; @@ -1512,6 +1548,7 @@ ** join order instead of in table number order, so ** we use 2 loops. */ + bestJoinOrderUsedPredsFromAbove = usingPredsPushedFromAbove; for (int i = 0; i < numOptimizables; i++) { bestJoinOrder[i] = proposedJoinOrder[i]; Index: java/engine/org/apache/derby/impl/sql/compile/PredicateList.java =================================================================== --- java/engine/org/apache/derby/impl/sql/compile/PredicateList.java (revision 421286) +++ java/engine/org/apache/derby/impl/sql/compile/PredicateList.java (working copy) @@ -405,7 +405,7 @@ ** Skip comparisons that are not qualifiers for the table ** in question. */ - if ( ! ((RelationalOperator) opNode).isQualifier(optTable)) + if ( ! ((RelationalOperator) opNode).isQualifier(optTable, false)) { continue; } @@ -547,7 +547,7 @@ } else { - if ( ! relop.isQualifier(optTable)) + if ( ! relop.isQualifier(optTable, pushPreds)) { // NOT a qualifier, go on to next predicate. continue; @@ -617,7 +617,7 @@ continue; } - if ( !isIn && ! relop.isQualifier(optTable)) + if ( !isIn && ! relop.isQualifier(optTable, pushPreds)) continue; /* Look for an index column on one side of the relop */ @@ -945,7 +945,7 @@ RelationalOperator relop = pred.getRelop(); // Transfer each non-qualifier - if (relop == null || ! relop.isQualifier(optTable)) + if (relop == null || ! relop.isQualifier(optTable, false)) { pred.clearScanFlags(); removeElementAt(index); @@ -3339,7 +3339,8 @@ ValueNode keyExp = relop.getExpressionOperand( optTable.getTableNumber(), - baseColumns[columnNumber]); + baseColumns[columnNumber], + (FromTable)optTable); if (keyExp instanceof ColumnReference) setOrderedNulls = @@ -3392,18 +3393,17 @@ for (int index = 0; index < size; index++) { Predicate pred = (Predicate) elementAt(index); - RelationalOperator relop = pred.getRelop(); - if (relop != null && - relop.getOperator() == RelationalOperator.EQUALS_RELOP) + if (relop != null) { if (relop.getOperator() == RelationalOperator.EQUALS_RELOP) { - ValueNode exprOp = relop.getExpressionOperand( - colRef.getTableNumber(), - colRef.getColumnNumber() - ); + ValueNode exprOp = relop.getOperand( + colRef, + pred.getReferencedSet().size(), + true + ); if (exprOp != null) { @@ -3417,10 +3417,12 @@ else if (relop.getOperator() == RelationalOperator.IS_NULL_RELOP) { - ColumnReference columnOp = relop.getColumnOperand( - colRef.getTableNumber(), - colRef.getColumnNumber() - ); + ColumnReference columnOp = + (ColumnReference)relop.getOperand( + colRef, + pred.getReferencedSet().size(), + false + ); if (columnOp != null) { Index: java/testing/org/apache/derbyTesting/functionTests/tests/lang/subquery.sql =================================================================== --- java/testing/org/apache/derbyTesting/functionTests/tests/lang/subquery.sql (revision 421286) +++ java/testing/org/apache/derbyTesting/functionTests/tests/lang/subquery.sql (working copy) @@ -437,3 +437,91 @@ drop table t1; drop table t3; +-- DERBY-781: Materialize subqueries where possible to avoid creating +-- invariant result sets many times. This test case executes a query +-- that has subqueries twice: the first time the tables have only a +-- few rows in them; the second time they have hundreds of rows in +-- them. + +create table t1 (i int, j int); +create table t2 (i int, j int); + +insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +insert into t2 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); + +create table t3 (a int, b int); +create table t4 (a int, b int); + +insert into t3 values (2, 2), (4, 4), (5, 5); +insert into t4 values (2, 2), (4, 4), (5, 5); + +-- Use of the term "DISTINCT" makes it so that we don't flatten +-- the subqueries. +create view V1 as select distinct T1.i, T2.j from T1, T2 where T1.i = T2.i; +create view V2 as select distinct T3.a, T4.b from T3, T4 where T3.a = T4.a; + +call SYSCS_UTIL.SYSCS_SET_RUNTIMESTATISTICS(1); +maximumdisplaywidth 20000; + +-- Run the test query the first time, with only a small number +-- of rows in each table. Before the patch for DERBY-781 +-- the optimizer would have chosen a nested loop join, which +-- means that we would generate the result set for the inner +-- view multiple times. After DERBY-781 the optimizer will +-- choose to do a hash join and thereby materialize the inner +-- result set, thus improving performance. Should see a +-- Hash join as the top-level join with a HashTableResult as +-- the right child of the outermost join. +select * from V1, V2 where V1.j = V2.b and V1.i in (1,2,3,4,5); +values SYSCS_UTIL.SYSCS_GET_RUNTIMESTATISTICS(); + +-- Now add more data to the tables. + +insert into t1 select * from t2; +insert into t2 select * from t1; +insert into t2 select * from t1; +insert into t1 select * from t2; +insert into t2 select * from t1; +insert into t1 select * from t2; +insert into t2 select * from t1; +insert into t1 select * from t2; +insert into t2 select * from t1; +insert into t1 select * from t2; + +insert into t3 select * from t4; +insert into t4 select * from t3; +insert into t3 select * from t4; +insert into t4 select * from t3; +insert into t3 select * from t4; +insert into t4 select * from t3; +insert into t3 select * from t4; +insert into t4 select * from t3; +insert into t3 select * from t4; +insert into t4 select * from t3; +insert into t3 select * from t4; + +-- Drop the views and recreate them with slightly different +-- names. The reason we use different names is to ensure that +-- the query will be "different" from the last time and thus we'll +-- we'll go through optimization again (instead of just using +-- the cached plan from last time). +drop view v1; +drop view v2; + +-- Use of the term "DISTINCT" makes it so that we don't flatten +-- the subqueries. +create view VV1 as select distinct T1.i, T2.j from T1, T2 where T1.i = T2.i; +create view VV2 as select distinct T3.a, T4.b from T3, T4 where T3.a = T4.a; + +-- Now execute the query again using the larger tables. +select * from VV1, VV2 where VV1.j = VV2.b and VV1.i in (1,2,3,4,5); +values SYSCS_UTIL.SYSCS_GET_RUNTIMESTATISTICS(); + +-- clean up. +call SYSCS_UTIL.SYSCS_SET_RUNTIMESTATISTICS(0); +drop view vv1; +drop view vv2; +drop table t1; +drop table t2; +drop table t3; +drop table t4; Index: java/testing/org/apache/derbyTesting/functionTests/master/predicatesIntoViews.out =================================================================== --- java/testing/org/apache/derbyTesting/functionTests/master/predicatesIntoViews.out (revision 421286) +++ java/testing/org/apache/derbyTesting/functionTests/master/predicatesIntoViews.out (working copy) @@ -6887,7 +6887,7 @@ next time (milliseconds) = 0 close time (milliseconds) = 0 Left result set: - Nested Loop Left Outer Join ResultSet: + Hash Left Outer Join ResultSet: Number of opens = 1 Rows seen from the left = 0 Rows seen from the right = 0 @@ -6947,7 +6947,7 @@ next time (milliseconds) = 0 close time (milliseconds) = 0 Left result set: - Nested Loop Left Outer Join ResultSet: + Hash Left Outer Join ResultSet: Number of opens = 1 Rows seen from the left = 0 Rows seen from the right = 0 @@ -7043,18 +7043,22 @@ Unknown return value: false Negate comparison result: false Right result set: - Project-Restrict ResultSet (18): + Hash Table ResultSet (18): Number of opens = 0 + Hash table size = 0 + Hash key is column number 0 Rows seen = 0 Rows filtered = 0 - restriction = true - projection = true constructor time (milliseconds) = 0 open time (milliseconds) = 0 next time (milliseconds) = 0 close time (milliseconds) = 0 - restriction time (milliseconds) = 0 - projection time (milliseconds) = 0 + next qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false Source result set: Project-Restrict ResultSet (17): Number of opens = 0 @@ -7207,18 +7211,22 @@ Unknown return value: false Negate comparison result: false Right result set: - Project-Restrict ResultSet (27): + Hash Table ResultSet (27): Number of opens = 0 + Hash table size = 0 + Hash key is column number 0 Rows seen = 0 Rows filtered = 0 - restriction = true - projection = true constructor time (milliseconds) = 0 open time (milliseconds) = 0 next time (milliseconds) = 0 close time (milliseconds) = 0 - restriction time (milliseconds) = 0 - projection time (milliseconds) = 0 + next qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false Source result set: Project-Restrict ResultSet (26): Number of opens = 0 Index: java/testing/org/apache/derbyTesting/functionTests/master/subquery.out =================================================================== --- java/testing/org/apache/derbyTesting/functionTests/master/subquery.out (revision 421286) +++ java/testing/org/apache/derbyTesting/functionTests/master/subquery.out (working copy) @@ -948,7 +948,7 @@ restriction time (milliseconds) = 0 projection time (milliseconds) = 0 Source result set: - Nested Loop Join ResultSet: + Hash Join ResultSet: Number of opens = 1 Rows seen from the left = 5 Rows seen from the right = 4 @@ -984,24 +984,29 @@ next qualifiers: None Right result set: - Project-Restrict ResultSet (4): + Hash Table ResultSet (4): Number of opens = 5 - Rows seen = 45 - Rows filtered = 41 - restriction = true - projection = true + Hash table size = 9 + Hash key is column number 0 + Rows seen = 9 + Rows filtered = 0 constructor time (milliseconds) = 0 open time (milliseconds) = 0 next time (milliseconds) = 0 close time (milliseconds) = 0 - restriction time (milliseconds) = 0 - projection time (milliseconds) = 0 + next time in milliseconds/row = 0 + next qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false Source result set: Distinct Scan ResultSet for T3 at read committed isolation level using instantaneous share row locking: - Number of opens = 5 + Number of opens = 1 Hash table size = 9 Distinct columns are column numbers (0,1) - Rows seen = 45 + Rows seen = 9 Rows filtered = 0 constructor time (milliseconds) = 0 open time (milliseconds) = 0 @@ -1028,4 +1033,628 @@ 0 rows inserted/updated/deleted ij> drop table t3; 0 rows inserted/updated/deleted +ij> -- DERBY-781: Materialize subqueries where possible to avoid creating +-- invariant result sets many times. This test case executes a query +-- that has subqueries twice: the first time the tables have only a +-- few rows in them; the second time they have hundreds of rows in +-- them. +create table t1 (i int, j int); +0 rows inserted/updated/deleted +ij> create table t2 (i int, j int); +0 rows inserted/updated/deleted +ij> insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +5 rows inserted/updated/deleted +ij> insert into t2 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +5 rows inserted/updated/deleted +ij> create table t3 (a int, b int); +0 rows inserted/updated/deleted +ij> create table t4 (a int, b int); +0 rows inserted/updated/deleted +ij> insert into t3 values (2, 2), (4, 4), (5, 5); +3 rows inserted/updated/deleted +ij> insert into t4 values (2, 2), (4, 4), (5, 5); +3 rows inserted/updated/deleted +ij> -- Use of the term "DISTINCT" makes it so that we don't flatten +-- the subqueries. +create view V1 as select distinct T1.i, T2.j from T1, T2 where T1.i = T2.i; +0 rows inserted/updated/deleted +ij> create view V2 as select distinct T3.a, T4.b from T3, T4 where T3.a = T4.a; +0 rows inserted/updated/deleted +ij> call SYSCS_UTIL.SYSCS_SET_RUNTIMESTATISTICS(1); +0 rows inserted/updated/deleted +ij> maximumdisplaywidth 20000; +ij> -- Run the test query the first time, with only a small number +-- of rows in each table. Before the patch for DERBY-781 +-- the optimizer would have chosen a nested loop join, which +-- means that we would generate the result set for the inner +-- view multiple times. After DERBY-781 the optimizer will +-- choose to do a hash join and thereby materialize the inner +-- result set, thus improving performance. Should see a +-- Hash join as the top-level join with a HashTableResult as +-- the right child of the outermost join. +select * from V1, V2 where V1.j = V2.b and V1.i in (1,2,3,4,5); +I |J |A |B +----------------------------------------------- +2 |2 |2 |2 +4 |4 |4 |4 +5 |5 |5 |5 +ij> values SYSCS_UTIL.SYSCS_GET_RUNTIMESTATISTICS(); +1 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +Statement Name: + null +Statement Text: + -- Run the test query the first time, with only a small number +-- of rows in each table. Before the patch for DERBY-781 +-- the optimizer would have chosen a nested loop join, which +-- means that we would generate the result set for the inner +-- view multiple times. After DERBY-781 the optimizer will +-- choose to do a hash join and thereby materialize the inner +-- result set, thus improving performance. Should see a +-- Hash join as the top-level join with a HashTableResult as +-- the right child of the outermost join. +select * from V1, V2 where V1.j = V2.b and V1.i in (1,2,3,4,5) +Parse Time: 0 +Bind Time: 0 +Optimize Time: 0 +Generate Time: 0 +Compile Time: 0 +Execute Time: 0 +Begin Compilation Timestamp : null +End Compilation Timestamp : null +Begin Execution Timestamp : null +End Execution Timestamp : null +Statement Execution Plan Text: +Hash Join ResultSet: +Number of opens = 1 +Rows seen from the left = 5 +Rows seen from the right = 3 +Rows filtered = 0 +Rows returned = 3 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 +Left result set: + Sort ResultSet: + Number of opens = 1 + Rows input = 5 + Rows returned = 5 + Eliminate duplicates = true + In sorted order = false + Sort information: + Number of rows input=5 + Number of rows output=5 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Source result set: + Project-Restrict ResultSet (7): + Number of opens = 1 + Rows seen = 5 + Rows filtered = 0 + restriction = false + projection = true + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + restriction time (milliseconds) = 0 + projection time (milliseconds) = 0 + Source result set: + Nested Loop Join ResultSet: + Number of opens = 1 + Rows seen from the left = 5 + Rows seen from the right = 5 + Rows filtered = 0 + Rows returned = 5 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Left result set: + Project-Restrict ResultSet (5): + Number of opens = 1 + Rows seen = 5 + Rows filtered = 0 + restriction = true + projection = false + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + restriction time (milliseconds) = 0 + projection time (milliseconds) = 0 + Source result set: + Table Scan ResultSet for T1 at read committed isolation level using share row locking chosen by the optimizer + Number of opens = 1 + Rows seen = 5 + Rows filtered = 0 + Fetch Size = 1 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched={0} + Number of columns fetched=1 + Number of pages visited=1 + Number of rows qualified=5 + Number of rows visited=5 + Scan type=heap + start position: +null stop position: +null qualifiers: +Column[0][0] Id: 0 +Operator: < +Ordered nulls: false +Unknown return value: true +Negate comparison result: true +Column[0][1] Id: 0 +Operator: <= +Ordered nulls: false +Unknown return value: false +Negate comparison result: false + Right result set: + Table Scan ResultSet for T2 at read committed isolation level using share row locking chosen by the optimizer + Number of opens = 5 + Rows seen = 5 + Rows filtered = 0 + Fetch Size = 1 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched=All + Number of columns fetched=2 + Number of pages visited=1 + Number of rows qualified=5 + Number of rows visited=25 + Scan type=heap + start position: +null stop position: +null qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false +Right result set: + Hash Table ResultSet (13): + Number of opens = 5 + Hash table size = 3 + Hash key is column number 1 + Rows seen = 3 + Rows filtered = 0 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + next qualifiers: +Column[0][0] Id: 1 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false + Source result set: + Sort ResultSet: + Number of opens = 1 + Rows input = 3 + Rows returned = 3 + Eliminate duplicates = true + In sorted order = false + Sort information: + Number of rows input=3 + Number of rows output=3 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Source result set: + Project-Restrict ResultSet (12): + Number of opens = 1 + Rows seen = 3 + Rows filtered = 0 + restriction = false + projection = true + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + restriction time (milliseconds) = 0 + projection time (milliseconds) = 0 + Source result set: + Hash Join ResultSet: + Number of opens = 1 + Rows seen from the left = 3 + Rows seen from the right = 3 + Rows filtered = 0 + Rows returned = 3 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Left result set: + Table Scan ResultSet for T3 at read committed isolation level using share row locking chosen by the optimizer + Number of opens = 1 + Rows seen = 3 + Rows filtered = 0 + Fetch Size = 1 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched={0} + Number of columns fetched=1 + Number of pages visited=1 + Number of rows qualified=3 + Number of rows visited=3 + Scan type=heap + start position: +null stop position: +null qualifiers: +None + Right result set: + Hash Scan ResultSet for T4 at read committed isolation level using instantaneous share row locking: + Number of opens = 3 + Hash table size = 3 + Hash key is column number 0 + Rows seen = 3 + Rows filtered = 0 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched=All + Number of columns fetched=2 + Number of pages visited=1 + Number of rows qualified=3 + Number of rows visited=3 + Scan type=heap + start position: +null stop position: +null scan qualifiers: +None + next qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false +ij> -- Now add more data to the tables. +insert into t1 select * from t2; +5 rows inserted/updated/deleted +ij> insert into t2 select * from t1; +10 rows inserted/updated/deleted +ij> insert into t2 select * from t1; +10 rows inserted/updated/deleted +ij> insert into t1 select * from t2; +25 rows inserted/updated/deleted +ij> insert into t2 select * from t1; +35 rows inserted/updated/deleted +ij> insert into t1 select * from t2; +60 rows inserted/updated/deleted +ij> insert into t2 select * from t1; +95 rows inserted/updated/deleted +ij> insert into t1 select * from t2; +155 rows inserted/updated/deleted +ij> insert into t2 select * from t1; +250 rows inserted/updated/deleted +ij> insert into t1 select * from t2; +405 rows inserted/updated/deleted +ij> insert into t3 select * from t4; +3 rows inserted/updated/deleted +ij> insert into t4 select * from t3; +6 rows inserted/updated/deleted +ij> insert into t3 select * from t4; +9 rows inserted/updated/deleted +ij> insert into t4 select * from t3; +15 rows inserted/updated/deleted +ij> insert into t3 select * from t4; +24 rows inserted/updated/deleted +ij> insert into t4 select * from t3; +39 rows inserted/updated/deleted +ij> insert into t3 select * from t4; +63 rows inserted/updated/deleted +ij> insert into t4 select * from t3; +102 rows inserted/updated/deleted +ij> insert into t3 select * from t4; +165 rows inserted/updated/deleted +ij> insert into t4 select * from t3; +267 rows inserted/updated/deleted +ij> insert into t3 select * from t4; +432 rows inserted/updated/deleted +ij> -- Drop the views and recreate them with slightly different +-- names. The reason we use different names is to ensure that +-- the query will be "different" from the last time and thus we'll +-- we'll go through optimization again (instead of just using +-- the cached plan from last time). +drop view v1; +0 rows inserted/updated/deleted +ij> drop view v2; +0 rows inserted/updated/deleted +ij> -- Use of the term "DISTINCT" makes it so that we don't flatten +-- the subqueries. +create view VV1 as select distinct T1.i, T2.j from T1, T2 where T1.i = T2.i; +0 rows inserted/updated/deleted +ij> create view VV2 as select distinct T3.a, T4.b from T3, T4 where T3.a = T4.a; +0 rows inserted/updated/deleted +ij> -- Now execute the query again using the larger tables. +select * from VV1, VV2 where VV1.j = VV2.b and VV1.i in (1,2,3,4,5); +I |J |A |B +----------------------------------------------- +2 |2 |2 |2 +4 |4 |4 |4 +5 |5 |5 |5 +ij> values SYSCS_UTIL.SYSCS_GET_RUNTIMESTATISTICS(); +1 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +Statement Name: + null +Statement Text: + -- Now execute the query again using the larger tables. +select * from VV1, VV2 where VV1.j = VV2.b and VV1.i in (1,2,3,4,5) +Parse Time: 0 +Bind Time: 0 +Optimize Time: 0 +Generate Time: 0 +Compile Time: 0 +Execute Time: 0 +Begin Compilation Timestamp : null +End Compilation Timestamp : null +Begin Execution Timestamp : null +End Execution Timestamp : null +Statement Execution Plan Text: +Hash Join ResultSet: +Number of opens = 1 +Rows seen from the left = 5 +Rows seen from the right = 3 +Rows filtered = 0 +Rows returned = 3 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 +Left result set: + Sort ResultSet: + Number of opens = 1 + Rows input = 53055 + Rows returned = 5 + Eliminate duplicates = true + In sorted order = false + Sort information: + Number of rows input=53055 + Number of rows output=5 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Source result set: + Project-Restrict ResultSet (7): + Number of opens = 1 + Rows seen = 53055 + Rows filtered = 0 + restriction = false + projection = true + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + restriction time (milliseconds) = 0 + projection time (milliseconds) = 0 + Source result set: + Hash Join ResultSet: + Number of opens = 1 + Rows seen from the left = 655 + Rows seen from the right = 53055 + Rows filtered = 0 + Rows returned = 53055 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Left result set: + Project-Restrict ResultSet (5): + Number of opens = 1 + Rows seen = 655 + Rows filtered = 0 + restriction = true + projection = false + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + restriction time (milliseconds) = 0 + projection time (milliseconds) = 0 + Source result set: + Table Scan ResultSet for T1 at read committed isolation level using share row locking chosen by the optimizer + Number of opens = 1 + Rows seen = 655 + Rows filtered = 0 + Fetch Size = 1 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched={0} + Number of columns fetched=1 + Number of pages visited=6 + Number of rows qualified=655 + Number of rows visited=655 + Scan type=heap + start position: +null stop position: +null qualifiers: +Column[0][0] Id: 0 +Operator: <= +Ordered nulls: false +Unknown return value: false +Negate comparison result: false +Column[0][1] Id: 0 +Operator: < +Ordered nulls: false +Unknown return value: true +Negate comparison result: true + Right result set: + Hash Scan ResultSet for T2 at read committed isolation level using instantaneous share row locking: + Number of opens = 655 + Hash table size = 5 + Hash key is column number 0 + Rows seen = 53055 + Rows filtered = 0 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched=All + Number of columns fetched=2 + Number of pages visited=4 + Number of rows qualified=405 + Number of rows visited=405 + Scan type=heap + start position: +null stop position: +null scan qualifiers: +None + next qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false +Right result set: + Hash Table ResultSet (13): + Number of opens = 5 + Hash table size = 3 + Hash key is column number 1 + Rows seen = 3 + Rows filtered = 0 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + next qualifiers: +Column[0][0] Id: 1 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false + Source result set: + Sort ResultSet: + Number of opens = 1 + Rows input = 100656 + Rows returned = 3 + Eliminate duplicates = true + In sorted order = false + Sort information: + Number of rows input=100656 + Number of rows output=3 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Source result set: + Project-Restrict ResultSet (12): + Number of opens = 1 + Rows seen = 100656 + Rows filtered = 0 + restriction = false + projection = true + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + restriction time (milliseconds) = 0 + projection time (milliseconds) = 0 + Source result set: + Hash Join ResultSet: + Number of opens = 1 + Rows seen from the left = 432 + Rows seen from the right = 100656 + Rows filtered = 0 + Rows returned = 100656 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + Left result set: + Table Scan ResultSet for T4 at read committed isolation level using share row locking chosen by the optimizer + Number of opens = 1 + Rows seen = 432 + Rows filtered = 0 + Fetch Size = 1 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched=All + Number of columns fetched=2 + Number of pages visited=4 + Number of rows qualified=432 + Number of rows visited=432 + Scan type=heap + start position: +null stop position: +null qualifiers: +None + Right result set: + Hash Scan ResultSet for T3 at read committed isolation level using instantaneous share row locking: + Number of opens = 432 + Hash table size = 3 + Hash key is column number 0 + Rows seen = 100656 + Rows filtered = 0 + constructor time (milliseconds) = 0 + open time (milliseconds) = 0 + next time (milliseconds) = 0 + close time (milliseconds) = 0 + next time in milliseconds/row = 0 + scan information: + Bit set of columns fetched={0} + Number of columns fetched=1 + Number of pages visited=6 + Number of rows qualified=699 + Number of rows visited=699 + Scan type=heap + start position: +null stop position: +null scan qualifiers: +None + next qualifiers: +Column[0][0] Id: 0 +Operator: = +Ordered nulls: false +Unknown return value: false +Negate comparison result: false +ij> -- clean up. +call SYSCS_UTIL.SYSCS_SET_RUNTIMESTATISTICS(0); +0 rows inserted/updated/deleted +ij> drop view vv1; +0 rows inserted/updated/deleted +ij> drop view vv2; +0 rows inserted/updated/deleted +ij> drop table t1; +0 rows inserted/updated/deleted +ij> drop table t2; +0 rows inserted/updated/deleted +ij> drop table t3; +0 rows inserted/updated/deleted +ij> drop table t4; +0 rows inserted/updated/deleted ij>