diff --git pom.xml pom.xml index 15c2805..17d1801 100644 --- pom.xml +++ pom.xml @@ -103,7 +103,7 @@ 3.4 1.7.7 0.8.0.RELEASE - 1.3.0-incubating + 1.4.0-incubating-SNAPSHOT 3.2.6 3.2.10 3.2.9 diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 5a5954d..0e282b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -278,29 +278,6 @@ private static SqlKind reverse(SqlKind kind) { } } - private static SqlOperator op(SqlKind kind, SqlOperator operator) { - switch (kind) { - case EQUALS: - return SqlStdOperatorTable.EQUALS; - case NOT_EQUALS: - return SqlStdOperatorTable.NOT_EQUALS; - case GREATER_THAN: - return SqlStdOperatorTable.GREATER_THAN; - case GREATER_THAN_OR_EQUAL: - return SqlStdOperatorTable.GREATER_THAN_OR_EQUAL; - case LESS_THAN: - return SqlStdOperatorTable.LESS_THAN; - case LESS_THAN_OR_EQUAL: - return SqlStdOperatorTable.LESS_THAN_OR_EQUAL; - case IS_DISTINCT_FROM: - return SqlStdOperatorTable.IS_DISTINCT_FROM; - case IS_NOT_DISTINCT_FROM: - return SqlStdOperatorTable.IS_NOT_DISTINCT_FROM; - default: - return operator; - } - } - private static void addJoinKey( List joinKeyList, RexNode key, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java index 18d2838..1df6542 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; @@ -49,6 +50,24 @@ public HiveSort(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, offset, fetch); } + /** + * Creates a HiveSort. + * + * @param input Input relational expression + * @param collation array of sort specifications + * @param offset Expression for number of rows to discard before returning + * first row + * @param fetch Expression for number of rows to fetch + */ + public static HiveSort create(RelNode input, RelCollation collation, + RexNode offset, RexNode fetch) { + RelOptCluster cluster = input.getCluster(); + collation = RelCollationTraitDef.INSTANCE.canonize(collation); + RelTraitSet traitSet = + TraitsUtil.getSortTraitSet(cluster, input.getTraitSet(), collation); + return new HiveSort(cluster, traitSet, input, collation, offset, fetch); + } + @Override public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation, RexNode offset, RexNode fetch) { @@ -77,9 +96,15 @@ public void implement(Implementor implementor) { private static class HiveSortRelFactory implements RelFactories.SortFactory { @Override - public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation, + public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation, RexNode offset, RexNode fetch) { - return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch); + return createSort(input, collation, offset, fetch); + } + + @Override + public RelNode createSort(RelNode input, RelCollation collation, RexNode offset, + RexNode fetch) { + return create(input, collation, offset, fetch); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index fd8f5cb..ac72ee5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -49,14 +49,14 @@ operand(HiveProject.class, any()), operand(HiveProject.class, any())), "JoinProjectTransposeRule(Project-Project)", - HiveProject.DEFAULT_PROJECT_FACTORY); + false, HiveProject.DEFAULT_PROJECT_FACTORY); public static final HiveJoinProjectTransposeRule LEFT_PROJECT = new HiveJoinProjectTransposeRule( operand(HiveJoin.class, some(operand(HiveProject.class, any()))), "JoinProjectTransposeRule(Project-Other)", - HiveProject.DEFAULT_PROJECT_FACTORY); + false, HiveProject.DEFAULT_PROJECT_FACTORY); public static final HiveJoinProjectTransposeRule RIGHT_PROJECT = new HiveJoinProjectTransposeRule( @@ -65,219 +65,37 @@ operand(RelNode.class, any()), operand(HiveProject.class, any())), "JoinProjectTransposeRule(Other-Project)", - HiveProject.DEFAULT_PROJECT_FACTORY); + false, HiveProject.DEFAULT_PROJECT_FACTORY); - private final ProjectFactory projectFactory; - - - private HiveJoinProjectTransposeRule( - RelOptRuleOperand operand, - String description, ProjectFactory pFactory) { - super(operand, description, pFactory); - this.projectFactory = pFactory; - } - - @Override - public void onMatch(RelOptRuleCall call) { - Join joinRel = call.rel(0); - JoinRelType joinType = joinRel.getJoinType(); - - Project leftProj; - Project rightProj; - RelNode leftJoinChild; - RelNode rightJoinChild; - - // see if at least one input's projection doesn't generate nulls - if (hasLeftChild(call)) { - leftProj = call.rel(1); - leftJoinChild = getProjectChild(call, leftProj, true); - } else { - leftProj = null; - leftJoinChild = call.rel(1); - } - if (hasRightChild(call)) { - rightProj = getRightChild(call); - rightJoinChild = getProjectChild(call, rightProj, false); - } else { - rightProj = null; - rightJoinChild = joinRel.getRight(); - } - if ((leftProj == null) && (rightProj == null)) { - return; - } - - // Construct two RexPrograms and combine them. The bottom program - // is a join of the projection expressions from the left and/or - // right projects that feed into the join. The top program contains - // the join condition. - - // Create a row type representing a concatenation of the inputs - // underneath the projects that feed into the join. This is the input - // into the bottom RexProgram. Note that the join type is an inner - // join because the inputs haven't actually been joined yet. - RelDataType joinChildrenRowType = - Join.deriveJoinRowType( - leftJoinChild.getRowType(), - rightJoinChild.getRowType(), - JoinRelType.INNER, - joinRel.getCluster().getTypeFactory(), - null, - Collections.emptyList()); - - // Create projection expressions, combining the projection expressions - // from the projects that feed into the join. For the RHS projection - // expressions, shift them to the right by the number of fields on - // the LHS. If the join input was not a projection, simply create - // references to the inputs. - int nProjExprs = joinRel.getRowType().getFieldCount(); - List> projects = - new ArrayList>(); - RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); - - createProjectExprs( - leftProj, - leftJoinChild, - 0, - rexBuilder, - joinChildrenRowType.getFieldList(), - projects); - - List leftFields = - leftJoinChild.getRowType().getFieldList(); - int nFieldsLeft = leftFields.size(); - createProjectExprs( - rightProj, - rightJoinChild, - nFieldsLeft, - rexBuilder, - joinChildrenRowType.getFieldList(), - projects); - - List projTypes = new ArrayList(); - for (int i = 0; i < nProjExprs; i++) { - projTypes.add(projects.get(i).left.getType()); - } - RelDataType projRowType = - rexBuilder.getTypeFactory().createStructType( - projTypes, - Pair.right(projects)); - - // create the RexPrograms and merge them - RexProgram bottomProgram = - RexProgram.create( - joinChildrenRowType, - Pair.left(projects), - null, - projRowType, - rexBuilder); - RexProgramBuilder topProgramBuilder = - new RexProgramBuilder( - projRowType, - rexBuilder); - topProgramBuilder.addIdentity(); - topProgramBuilder.addCondition(joinRel.getCondition()); - RexProgram topProgram = topProgramBuilder.getProgram(); - RexProgram mergedProgram = - RexProgramBuilder.mergePrograms( - topProgram, - bottomProgram, - rexBuilder); + public static final HiveJoinProjectTransposeRule BOTH_PROJECT_INCLUDE_OUTER = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + operand(HiveProject.class, any()), + operand(HiveProject.class, any())), + "Join(IncludingOuter)ProjectTransposeRule(Project-Project)", + true, HiveProject.DEFAULT_PROJECT_FACTORY); - // expand out the join condition and construct a new LogicalJoin that - // directly references the join children without the intervening - // ProjectRels - RexNode newCondition = - mergedProgram.expandLocalRef( - mergedProgram.getCondition()); - Join newJoinRel = - joinRel.copy(joinRel.getTraitSet(), newCondition, - leftJoinChild, rightJoinChild, joinRel.getJoinType(), - joinRel.isSemiJoinDone()); + public static final HiveJoinProjectTransposeRule LEFT_PROJECT_INCLUDE_OUTER = + new HiveJoinProjectTransposeRule( + operand(HiveJoin.class, + some(operand(HiveProject.class, any()))), + "Join(IncludingOuter)ProjectTransposeRule(Project-Other)", + true, HiveProject.DEFAULT_PROJECT_FACTORY); - // expand out the new projection expressions; if the join is an - // outer join, modify the expressions to reference the join output - List newProjExprs = new ArrayList(); - List projList = mergedProgram.getProjectList(); - List newJoinFields = - newJoinRel.getRowType().getFieldList(); - int nJoinFields = newJoinFields.size(); - int[] adjustments = new int[nJoinFields]; - for (int i = 0; i < nProjExprs; i++) { - RexNode newExpr = mergedProgram.expandLocalRef(projList.get(i)); - if (joinType != JoinRelType.INNER) { - newExpr = - newExpr.accept( - new RelOptUtil.RexInputConverter( - rexBuilder, - joinChildrenRowType.getFieldList(), - newJoinFields, - adjustments)); - } - newProjExprs.add(newExpr); - } + public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_INCLUDE_OUTER = + new HiveJoinProjectTransposeRule( + operand( + HiveJoin.class, + operand(RelNode.class, any()), + operand(HiveProject.class, any())), + "Join(IncludingOuter)ProjectTransposeRule(Other-Project)", + true, HiveProject.DEFAULT_PROJECT_FACTORY); - // finally, create the projection on top of the join - RelNode newProjRel = projectFactory.createProject(newJoinRel, newProjExprs, - joinRel.getRowType().getFieldNames()); - call.transformTo(newProjRel); + private HiveJoinProjectTransposeRule( + RelOptRuleOperand operand, String description, + boolean includeOuter, ProjectFactory projectFactory) { + super(operand, description, includeOuter, projectFactory); } - /** - * Creates projection expressions corresponding to one of the inputs into - * the join - * - * @param projRel the projection input into the join (if it exists) - * @param joinChild the child of the projection input (if there is a - * projection); otherwise, this is the join input - * @param adjustmentAmount the amount the expressions need to be shifted by - * @param rexBuilder rex builder - * @param joinChildrenFields concatenation of the fields from the left and - * right join inputs (once the projections have been - * removed) - * @param projects Projection expressions & names to be created - */ - private void createProjectExprs( - Project projRel, - RelNode joinChild, - int adjustmentAmount, - RexBuilder rexBuilder, - List joinChildrenFields, - List> projects) { - List childFields = - joinChild.getRowType().getFieldList(); - if (projRel != null) { - List> namedProjects = - projRel.getNamedProjects(); - int nChildFields = childFields.size(); - int[] adjustments = new int[nChildFields]; - for (int i = 0; i < nChildFields; i++) { - adjustments[i] = adjustmentAmount; - } - for (Pair pair : namedProjects) { - RexNode e = pair.left; - if (adjustmentAmount != 0) { - // shift the references by the adjustment amount - e = e.accept( - new RelOptUtil.RexInputConverter( - rexBuilder, - childFields, - joinChildrenFields, - adjustments)); - } - projects.add(Pair.of(e, pair.right)); - } - } else { - // no projection; just create references to the inputs - for (int i = 0; i < childFields.size(); i++) { - final RelDataTypeField field = childFields.get(i); - projects.add( - Pair.of( - (RexNode) rexBuilder.makeInputRef( - field.getType(), - i + adjustmentAmount), - field.getName())); - } - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 29deed9..2222e07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -22,7 +22,9 @@ import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.metadata.RelMetadataQuery; @@ -34,6 +36,8 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.util.Util; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hive.common.util.AnnotationUtils; @@ -52,6 +56,11 @@ * and applies them appropriately. */ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { + + private static final Log LOG = LogFactory + .getLog(HiveJoinPushTransitivePredicatesRule.class.getName()); + + private final RelFactories.FilterFactory filterFactory; /** The singleton. */ @@ -81,16 +90,29 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, return; } + RelNode lCurr = lChild; if (leftPreds.size() > 0) { - RelNode curr = lChild; - lChild = filterFactory.createFilter(lChild, RexUtil.composeConjunction(rB, leftPreds, false)); - call.getPlanner().onCopy(curr, lChild); + RexNode condition = RexUtil.composeConjunction(rB, leftPreds, false); + if (!(lCurr instanceof Filter) || !((Filter)lCurr).getCondition().toString(). + equals(condition.toString())) { + lChild = filterFactory.createFilter(lChild, condition); + call.getPlanner().onCopy(lCurr, lChild); + } } + RelNode rCurr = rChild; if (rightPreds.size() > 0) { - RelNode curr = rChild; - rChild = filterFactory.createFilter(rChild, RexUtil.composeConjunction(rB, rightPreds, false)); - call.getPlanner().onCopy(curr, rChild); + RexNode condition = RexUtil.composeConjunction(rB, rightPreds, false); + if (!(rCurr instanceof Filter) || !((Filter)rCurr).getCondition().toString(). + equals(condition.toString())) { + rChild = filterFactory.createFilter(rChild, condition); + call.getPlanner().onCopy(rCurr, rChild); + } + } + + if (lCurr == lChild && rCurr == rChild) { + // Nothing to do + return; } RelNode newRel = join.copy(join.getTraitSet(), join.getCondition(), diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f26d1df..73ae7c4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -914,8 +914,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { // 6.1. Merge join into multijoin operators (if possible) calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT, - HiveJoinProjectTransposeRule.LEFT_PROJECT, HiveJoinProjectTransposeRule.RIGHT_PROJECT, + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, + HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER, HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); // The previous rules can pull up projections through join operators, // thus we run the field trimmer again to push them back down diff --git ql/src/test/results/clientpositive/auto_join13.q.out ql/src/test/results/clientpositive/auto_join13.q.out index 952dbf8..aa05254 100644 --- ql/src/test/results/clientpositive/auto_join13.q.out +++ ql/src/test/results/clientpositive/auto_join13.q.out @@ -21,18 +21,18 @@ JOIN ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -41,17 +41,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 200.0) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) < 200.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 UDFToDouble(_col0) (type: double) - $hdt$_0:$hdt$_1:src + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: double) + 1 _col2 (type: double) + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -67,7 +70,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -88,30 +91,34 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col2) + UDFToDouble(_col0)) is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col2,_col1) (type: int) - outputColumnNames: _col0 + Select Operator + expressions: _col2 (type: string), _col1 (type: string), (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: double) + 1 _col2 (type: double) + outputColumnNames: _col1, _col2 Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - mode: hash + Select Operator + expressions: hash(_col1,_col2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index e09057a..86ad261 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -283,32 +283,35 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) = 1.0) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_int (type: int), c_float (type: float) - outputColumnNames: _col1, _col2 + expressions: key (type: string), c_int (type: int), c_float (type: float), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '1.0' (type: string) - sort order: + - Map-reduce partition columns: '1.0' (type: string) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: float) + Filter Operator + predicate: _col3 is not null (type: boolean) + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: double), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col3 (type: double), _col0 (type: string) + Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: float) TableScan alias: t3 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((c_int = 1) and (UDFToDouble(key) = 1.0)) (type: boolean) + predicate: ((c_int = 1) and key is not null) (type: boolean) Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_float (type: float) - outputColumnNames: _col2 + expressions: key (type: string), c_float (type: float) + outputColumnNames: _col0, _col2 Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '1.0' (type: string) - sort order: + - Map-reduce partition columns: '1.0' (type: string) + key expressions: 1.0 (type: double), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: 1.0 (type: double), _col0 (type: string) Statistics: Num rows: 5 Data size: 65 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: float) Reduce Operator Tree: @@ -316,15 +319,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE + 0 _col3 (type: double), _col0 (type: string) + 1 _col3 (type: double), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col6 + Statistics: Num rows: 5 Data size: 71 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 + _col5) > 2.0) or ((_col1 + 1) > 2)) (type: boolean) - Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col2 + _col6) > 2.0) or ((_col1 + 1) > 2)) (type: boolean) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -339,21 +344,23 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) = 1.0) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '1.0' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '1.0' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator - key expressions: '1.0' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '1.0' (type: string) - Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -361,18 +368,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '1.0' (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 144 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join13.q.out ql/src/test/results/clientpositive/join13.q.out index 3b921b9..36819bc 100644 --- ql/src/test/results/clientpositive/join13.q.out +++ ql/src/test/results/clientpositive/join13.q.out @@ -25,12 +25,12 @@ JOIN ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -73,52 +73,59 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col2) + UDFToDouble(_col0)) is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col2 (type: string), _col1 (type: string), (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - sort order: + - Map-reduce partition columns: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 200.0) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) < 200.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: UDFToDouble(key) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col2 (type: double) + sort order: + + Map-reduce partition columns: _col2 (type: double) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 UDFToDouble(_col0) (type: double) + 0 _col0 (type: double) + 1 _col2 (type: double) outputColumnNames: _col1, _col2 Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/lineage3.q.out ql/src/test/results/clientpositive/lineage3.q.out index b6b4e0b..4f05157 100644 --- ql/src/test/results/clientpositive/lineage3.q.out +++ ql/src/test/results/clientpositive/lineage3.q.out @@ -135,7 +135,7 @@ and x.ctinyint + length(c.cstring2) < 1000 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"3a12ad24b2622a8958df12d0bdc60f8a","queryText":"select x.ctinyint, x.cint, c.cbigint-100, c.cstring1\nfrom alltypesorc c\njoin (\n select a.ctinyint ctinyint, b.cint cint\n from (select * from alltypesorc a where cboolean1=false) a\n join alltypesorc b on (a.cint = b.cbigint - 224870380)\n ) x on (x.cint = c.cint)\nwhere x.ctinyint > 10\nand x.cint < 4.5\nand x.ctinyint + length(c.cstring2) < 1000","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"cint","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"(c.cbigint - UDFToLong(100))","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"(UDFToDouble(c.cint) < 4.5)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"((UDFToInteger(ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(c.cint = c.cint)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3],"expression":"(c.cboolean1 = false)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1,2,3],"expression":"(c.ctinyint > 10)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"((c.cbigint - UDFToLong(224870380)) = UDFToLong(c.cint))","edgeType":"PREDICATE"},{"sources":[4,8],"targets":[0,1,2,3],"expression":"((UDFToInteger(c.ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"x.cint"},{"id":2,"vertexType":"COLUMN","vertexId":"c2"},{"id":3,"vertexType":"COLUMN","vertexId":"c.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"}]} +{"version":"1.0","engine":"mr","hash":"3a12ad24b2622a8958df12d0bdc60f8a","queryText":"select x.ctinyint, x.cint, c.cbigint-100, c.cstring1\nfrom alltypesorc c\njoin (\n select a.ctinyint ctinyint, b.cint cint\n from (select * from alltypesorc a where cboolean1=false) a\n join alltypesorc b on (a.cint = b.cbigint - 224870380)\n ) x on (x.cint = c.cint)\nwhere x.ctinyint > 10\nand x.cint < 4.5\nand x.ctinyint + length(c.cstring2) < 1000","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"cint0","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"(c.cbigint - UDFToLong(100))","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"(UDFToDouble(c.cint) < 4.5)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3],"expression":"((UDFToInteger(ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1,2,3],"expression":"(c.cint = c.cint)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3],"expression":"(c.cboolean1 = false)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1,2,3],"expression":"(c.ctinyint > 10)","edgeType":"PREDICATE"},{"sources":[6,5],"targets":[0,1,2,3],"expression":"((c.cbigint - UDFToLong(224870380)) = UDFToLong(c.cint))","edgeType":"PREDICATE"},{"sources":[4,8],"targets":[0,1,2,3],"expression":"((UDFToInteger(c.ctinyint) + length(c.cstring2)) < 1000)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x.ctinyint"},{"id":1,"vertexType":"COLUMN","vertexId":"x.cint"},{"id":2,"vertexType":"COLUMN","vertexId":"c2"},{"id":3,"vertexType":"COLUMN","vertexId":"c.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"}]} 11 -654374827 857266369 OEfPnHnIYueoup PREHOOK: query: select c1, x2, x3 from ( @@ -180,13 +180,13 @@ PREHOOK: Input: default@src1 #### A masked pattern was here #### {"version":"1.0","engine":"mr","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > UDFToDouble(300))","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[2,4],"targets":[0,1],"expression":"(UDFToDouble(a.key) = UDFToDouble((UDFToInteger(b.ctinyint) + 300)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 311 val_311 -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select key, value from src1 where key not in (select key+18 from src1) order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"expression":"key","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"_o__c0 is null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + UDFToDouble(18)) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} +{"version":"1.0","engine":"mr","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"expression":"key","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[],"targets":[0,1],"expression":"(1 = 1)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"_o__c0 is null","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) + UDFToDouble(18)) is null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"(count(*) = 0)","edgeType":"PREDICATE"},{"sources":[],"targets":[0,1],"expression":"true","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + UDFToDouble(18)))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]} PREHOOK: query: select * from src1 a where not exists (select cint from alltypesorc b diff --git ql/src/test/results/clientpositive/subquery_notin.q.out ql/src/test/results/clientpositive/subquery_notin.q.out index fd6d53b..3d5ed50 100644 --- ql/src/test/results/clientpositive/subquery_notin.q.out +++ ql/src/test/results/clientpositive/subquery_notin.q.out @@ -567,7 +567,7 @@ Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- agg, non corr explain select p_name, p_size @@ -668,12 +668,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -686,29 +701,26 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col1) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col1) (type: double) + sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: int) TableScan Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 UDFToDouble(_col1) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col2 + 0 + 1 + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -721,29 +733,40 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + key expressions: _col2 (type: double) + sort order: + + Map-reduce partition columns: _col2 (type: double) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: int) TableScan Reduce Output Operator - sort order: + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + 0 _col2 (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -815,27 +838,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -843,7 +851,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in diff --git ql/src/test/results/clientpositive/subquery_views.q.out ql/src/test/results/clientpositive/subquery_views.q.out index 41834a3..be10ca6 100644 --- ql/src/test/results/clientpositive/subquery_views.q.out +++ ql/src/test/results/clientpositive/subquery_views.q.out @@ -70,7 +70,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@cv2 Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -97,7 +97,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean) + predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -252,7 +252,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean) + predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -379,7 +379,7 @@ STAGE PLANS: ListSink Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index e8a9786..1b69fdc 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -2619,79 +2619,92 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 - File Output Operator [FS_24] + Reducer 3 + File Output Operator [FS_31] compressed:false Statistics:Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_23] + Select Operator [SEL_30] outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_34] - | condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}] - | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} + Merge Join Operator [MERGEJOIN_41] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_17] + |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_28] | key expressions:_col0 (type: string) | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: int), _col2 (type: float) - | Select Operator [SEL_2] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_31] - | predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) - | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:cbo_t1 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_19] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_13] + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_24] | keys:_col0 (type: string) | outputColumnNames:["_col0"] - | Statistics:Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_9] + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_22] | outputColumnNames:["_col0"] - | Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_32] - | predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) - | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_7] - | alias:cbo_t2 + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_39] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_21] + | alias:cbo_t3 | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_21] + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_26] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ - Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_15] - keys:_col0 (type: string) - outputColumnNames:["_col0"] - Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_11] - outputColumnNames:["_col0"] - Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_33] - predicate:key is not null (type: boolean) - Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_10] - alias:cbo_t3 - Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: float) + Merge Join Operator [MERGEJOIN_40] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: float) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_37] + | predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_13] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_11] + outputColumnNames:["_col0"] + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_38] + predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_9] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by a+b desc, c asc) cbo_t1 left semi join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by q+r/10 desc, p) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 >= 0) and (b > 0 or a >= 0) group by a, c having a > 0 and (a >=1 or c >= 1) and (a + c) >= 0 order by c, a PREHOOK: type: QUERY @@ -2700,174 +2713,190 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 - File Output Operator [FS_50] + Reducer 7 + File Output Operator [FS_58] compressed:false Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_49] + Select Operator [SEL_57] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_48] + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_56] key expressions:_col1 (type: bigint), _col0 (type: string) sort order:++ Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_46] + Group By Operator [GBY_54] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: string), KEY._col1 (type: bigint) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_45] + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_53] key expressions:_col0 (type: string), _col1 (type: bigint) Map-reduce partition columns:_col0 (type: string), _col1 (type: bigint) sort order:++ Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col2 (type: bigint) - Group By Operator [GBY_44] + Group By Operator [GBY_52] aggregations:["count()"] keys:_col0 (type: string), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_63] - | condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}] - | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_41] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_35] - | keys:_col0 (type: string) - | outputColumnNames:["_col0"] - | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_31] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_61] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_30] - | alias:cbo_t3 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_37] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col1 (type: bigint) - | Select Operator [SEL_11] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_10] - | key expressions:_col3 (type: double), _col2 (type: bigint) - | sort order:-+ - | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col0 (type: string) - | Filter Operator [FIL_57] - | predicate:(((_col1 + 1) >= 0) and ((_col1 > 0) or (UDFToDouble(_col0) >= 0.0))) (type: boolean) - | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_9] - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_58] - | predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean) - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_62] - | outputColumnNames:["_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_8] - | | aggregations:["sum(VALUE._col0)"] - | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_7] - | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | sort order:+++ - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col3 (type: bigint) - | Group By Operator [GBY_6] - | aggregations:["sum(_col1)"] - | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_2] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_59] - | predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean) - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:cbo_t1 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_39] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_33] - keys:_col0 (type: string) - outputColumnNames:["_col0"] - Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_28] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_27] - key expressions:_col1 (type: double), _col0 (type: string) - sort order:-+ + Select Operator [SEL_50] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_71] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_48] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_44] + | keys:_col0 (type: string) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_42] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_68] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_41] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_46] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col2 (type: bigint) + Merge Join Operator [MERGEJOIN_70] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_37] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_33] + | keys:_col0 (type: string) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_30] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_29] + | key expressions:_col1 (type: double), _col0 (type: string) + | sort order:-+ + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_28] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_27] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_26] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_25] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_21] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_67] + | predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) and (UDFToDouble(key) > 0.0)) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_19] + | alias:cbo_t2 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_35] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_26] - outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_25] - | aggregations:["sum(VALUE._col0)"] - | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_24] - key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - sort order:+++ - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col3 (type: bigint) - Group By Operator [GBY_23] - aggregations:["sum(_col1)"] - keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + value expressions:_col2 (type: bigint) + Select Operator [SEL_11] + | outputColumnNames:["_col0","_col2"] + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col3 (type: double), _col2 (type: bigint) + sort order:-+ + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Filter Operator [FIL_64] + predicate:(((_col1 + 1) >= 0) and ((_col1 > 0) or (UDFToDouble(_col0) >= 0.0))) (type: boolean) + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] outputColumnNames:["_col0","_col1","_col2","_col3"] - Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_19] - outputColumnNames:["_col0","_col1","_col2"] - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_60] - predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) - Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_17] - alias:cbo_t2 - Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_65] + predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean) + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_69] + outputColumnNames:["_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_8] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_6] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_66] + predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean) + Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY @@ -3496,23 +3525,23 @@ Stage-0 limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_36] + File Output Operator [FS_37] compressed:false Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Merge Join Operator [MERGEJOIN_49] + Merge Join Operator [MERGEJOIN_50] | condition map:[{"":"Left Semi Join 0 to 1"}] | keys:{"1":"_col0 (type: bigint)","0":"_col2 (type: bigint)"} | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 3 [SIMPLE_EDGE] - | Reduce Output Operator [RS_31] + | Reduce Output Operator [RS_32] | key expressions:_col2 (type: bigint) | Map-reduce partition columns:_col2 (type: bigint) | sort order:+ | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE | value expressions:_col0 (type: string), _col1 (type: string) - | Filter Operator [FIL_42] + | Filter Operator [FIL_43] | predicate:_col2 is not null (type: boolean) | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE | Group By Operator [GBY_18] @@ -3532,7 +3561,7 @@ Stage-0 | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE - | Merge Join Operator [MERGEJOIN_48] + | Merge Join Operator [MERGEJOIN_49] | | condition map:[{"":"Left Semi Join 0 to 1"}] | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} | | outputColumnNames:["_col0","_col1"] @@ -3547,7 +3576,7 @@ Stage-0 | | Select Operator [SEL_2] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - | | Filter Operator [FIL_43] + | | Filter Operator [FIL_44] | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE | | TableScan [TS_0] @@ -3566,55 +3595,55 @@ Stage-0 | Select Operator [SEL_7] | outputColumnNames:["_col0"] | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_44] + | Filter Operator [FIL_45] | predicate:(key > '8') (type: boolean) | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE | TableScan [TS_5] | alias:b | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_33] + Reduce Output Operator [RS_34] key expressions:_col0 (type: bigint) Map-reduce partition columns:_col0 (type: bigint) sort order:+ Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_29] + Group By Operator [GBY_30] keys:_col0 (type: bigint) outputColumnNames:["_col0"] Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_27] + Select Operator [SEL_28] outputColumnNames:["_col0"] Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_45] + Filter Operator [FIL_46] predicate:_col1 is not null (type: boolean) Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_47] + Select Operator [SEL_48] outputColumnNames:["_col1"] Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_26] + Group By Operator [GBY_27] | aggregations:["count(VALUE._col0)"] | keys:KEY._col0 (type: string) | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE |<-Map 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_25] + Reduce Output Operator [RS_26] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE value expressions:_col1 (type: bigint) - Group By Operator [GBY_24] + Group By Operator [GBY_25] aggregations:["count()"] keys:_col0 (type: string) outputColumnNames:["_col0","_col1"] Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_22] + Select Operator [SEL_23] outputColumnNames:["_col0"] Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_46] + Filter Operator [FIL_47] predicate:(key > '9') (type: boolean) Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_20] + TableScan [TS_21] alias:b Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -3642,17 +3671,17 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_22] + File Output Operator [FS_23] compressed:false Statistics:Num rows: 3 Data size: 681 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Merge Join Operator [MERGEJOIN_27] + Merge Join Operator [MERGEJOIN_28] | condition map:[{"":"Left Semi Join 0 to 1"}] | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 3 Data size: 681 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_17] + | Reduce Output Operator [RS_18] | key expressions:_col1 (type: string) | Map-reduce partition columns:_col1 (type: string) | sort order:+ @@ -3678,42 +3707,42 @@ Stage-0 | Select Operator [SEL_2] | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_25] + | Filter Operator [FIL_26] | predicate:p_name is not null (type: boolean) | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE | TableScan [TS_0] | alias:part | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_19] + Reduce Output Operator [RS_20] key expressions:_col0 (type: string) Map-reduce partition columns:_col0 (type: string) sort order:+ Statistics:Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_15] + Group By Operator [GBY_16] keys:_col0 (type: string) outputColumnNames:["_col0"] Statistics:Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_12] + Select Operator [SEL_13] outputColumnNames:["_col0"] Statistics:Num rows: 13 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_26] + Filter Operator [FIL_27] predicate:first_value_window_0 is not null (type: boolean) Statistics:Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator [PTF_11] + PTF Operator [PTF_12] Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_10] + Select Operator [SEL_11] | outputColumnNames:["_col1","_col2","_col5"] | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE |<-Map 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_9] + Reduce Output Operator [RS_10] key expressions:p_mfgr (type: string), p_size (type: int) Map-reduce partition columns:p_mfgr (type: string) sort order:++ Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE value expressions:p_name (type: string) - TableScan [TS_8] + TableScan [TS_9] alias:part Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE @@ -3981,116 +4010,122 @@ Stage-0 Reducer 4 File Output Operator [FS_39] compressed:false - Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Select Operator [SEL_38] | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE |<-Reducer 3 [SIMPLE_EDGE] Reduce Output Operator [RS_37] key expressions:_col0 (type: string) sort order:+ - Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE value expressions:_col1 (type: int) - Merge Join Operator [MERGEJOIN_49] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{} - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 1 Data size: 146 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_31] - | sort order: - | Statistics:Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col0 (type: string), _col1 (type: int) - | Filter Operator [FIL_42] - | predicate:_col2 is null (type: boolean) - | Statistics:Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE - | Merge Join Operator [MERGEJOIN_48] - | | condition map:[{"":"Left Outer Join0 to 1"}] - | | keys:{"1":"_col0 (type: double)","0":"UDFToDouble(_col1) (type: double)"} - | | outputColumnNames:["_col0","_col1","_col2"] - | | Statistics:Num rows: 1 Data size: 133 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 1 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_28] - | | key expressions:UDFToDouble(_col1) (type: double) - | | Map-reduce partition columns:UDFToDouble(_col1) (type: double) - | | sort order:+ - | | Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - | | value expressions:_col0 (type: string), _col1 (type: int) - | | Select Operator [SEL_2] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - | | TableScan [TS_0] - | | alias:part - | | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Reducer 6 [SIMPLE_EDGE] - | Reduce Output Operator [RS_29] - | key expressions:_col0 (type: double) - | Map-reduce partition columns:_col0 (type: double) - | sort order:+ - | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_9] - | | aggregations:["avg(VALUE._col0)"] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 5 [SIMPLE_EDGE] - | Reduce Output Operator [RS_8] - | sort order: - | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - | value expressions:_col0 (type: struct) - | Group By Operator [GBY_7] - | aggregations:["avg(_col0)"] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - | Select Operator [SEL_5] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_44] - | predicate:(p_size < 10) (type: boolean) - | Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_3] - | alias:part - | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_32] - sort order: - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_24] - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Filter Operator [FIL_45] - predicate:(_col0 = 0) (type: boolean) + Select Operator [SEL_36] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_42] + predicate:_col3 is null (type: boolean) + Statistics:Num rows: 15 Data size: 1966 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_49] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: double)","0":"_col2 (type: double)"} + | outputColumnNames:["_col0","_col1","_col3"] + | Statistics:Num rows: 30 Data size: 3932 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_32] + | key expressions:_col2 (type: double) + | Map-reduce partition columns:_col2 (type: double) + | sort order:+ + | Statistics:Num rows: 28 Data size: 3575 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: int) + | Select Operator [SEL_23] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 28 Data size: 3575 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_48] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 28 Data size: 3575 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_20] + | | sort order: + | | Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + | | value expressions:_col0 (type: string), _col1 (type: int) + | | Select Operator [SEL_2] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + | | TableScan [TS_0] + | | alias:part + | | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | Select Operator [SEL_16] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | Filter Operator [FIL_43] + | predicate:(_col0 = 0) (type: boolean) + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_15] + | aggregations:["count()"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_10] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_44] + | predicate:_col0 is null (type: boolean) + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_9] + | | aggregations:["avg(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_8] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | value expressions:_col0 (type: struct) + | Group By Operator [GBY_7] + | aggregations:["avg(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_45] + | predicate:(p_size < 10) (type: boolean) + | Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:part + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_33] + key expressions:_col0 (type: double) + Map-reduce partition columns:_col0 (type: double) + sort order:+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_23] - aggregations:["count()"] - outputColumnNames:["_col0"] - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_18] - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_46] - predicate:_col0 is null (type: boolean) - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_17] - | aggregations:["avg(VALUE._col0)"] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 7 [SIMPLE_EDGE] - Reduce Output Operator [RS_16] - sort order: - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - value expressions:_col0 (type: struct) - Group By Operator [GBY_15] - aggregations:["avg(_col0)"] - outputColumnNames:["_col0"] - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_13] - outputColumnNames:["_col0"] - Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_47] - predicate:(p_size < 10) (type: boolean) - Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_11] - alias:part - Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_30] + | aggregations:["avg(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col0 (type: struct) + Group By Operator [GBY_28] + aggregations:["avg(_col0)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_26] + outputColumnNames:["_col0"] + Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_47] + predicate:(p_size < 10) (type: boolean) + Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_24] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) from part b diff --git ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out index 8f43b26..f4616db 100644 --- ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out +++ ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_2.q.out @@ -34,8 +34,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -44,78 +45,102 @@ STAGE PLANS: alias: a Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint < 100) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToInteger(_col1) (type: int) - sort order: + - Map-reduce partition columns: UDFToInteger(_col1) (type: int) + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Map 4 + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToInteger(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToInteger(key) + 0) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: (UDFToInteger(key) + 0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToInteger(_col0) + 0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 UDFToInteger(_col1) (type: int) - 1 UDFToInteger(_col0) (type: int) - 2 (UDFToInteger(_col0) + 0) (type: int) + 0 _col12 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -220,8 +245,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -230,82 +256,108 @@ STAGE PLANS: alias: a Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint < 100) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToInteger(_col1) (type: int) - sort order: + - Map-reduce partition columns: UDFToInteger(_col1) (type: int) + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Map 4 + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToInteger(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToInteger(key) + 0) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: (UDFToInteger(key) + 0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToInteger(_col0) + 0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 KEY.reducesinkkey0 (type: int) 1 KEY.reducesinkkey0 (type: int) - 2 KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 5 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + 1 Map 5 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -408,8 +460,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -418,82 +471,108 @@ STAGE PLANS: alias: a Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint < 100) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToInteger(_col1) (type: int) - sort order: + - Map-reduce partition columns: UDFToInteger(_col1) (type: int) + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Map 4 + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToInteger(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToInteger(key) + 0) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: (UDFToInteger(key) + 0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToInteger(_col0) + 0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 KEY.reducesinkkey0 (type: int) 1 KEY.reducesinkkey0 (type: int) - 2 KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 5 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + 1 Map 5 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out index e814103..f27e1be 100644 --- ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out +++ ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_2.q.out @@ -34,8 +34,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -44,78 +45,102 @@ STAGE PLANS: alias: a Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint < 100) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToInteger(_col1) (type: int) - sort order: + - Map-reduce partition columns: UDFToInteger(_col1) (type: int) + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Map 4 + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToInteger(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToInteger(key) + 0) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: (UDFToInteger(key) + 0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToInteger(_col0) + 0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 UDFToInteger(_col1) (type: int) - 1 UDFToInteger(_col0) (type: int) - 2 (UDFToInteger(_col0) + 0) (type: int) + 0 _col12 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -220,8 +245,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -230,84 +256,111 @@ STAGE PLANS: alias: a Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint < 100) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToInteger(_col1) (type: int) - sort order: + - Map-reduce partition columns: UDFToInteger(_col1) (type: int) + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToInteger(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToInteger(key) + 0) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: (UDFToInteger(key) + 0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToInteger(_col0) + 0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 KEY.reducesinkkey0 (type: int) 1 KEY.reducesinkkey0 (type: int) - 2 KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 5 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + 1 Map 5 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -411,8 +464,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -421,84 +475,111 @@ STAGE PLANS: alias: a Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((csmallint < 100) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + predicate: (csmallint < 100) (type: boolean) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToInteger(_col1) (type: int) - sort order: + - Map-reduce partition columns: UDFToInteger(_col1) (type: int) + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToInteger(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToInteger(_col0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToInteger(key) + 0) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Select Operator + expressions: (UDFToInteger(key) + 0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToInteger(_col0) + 0) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: (UDFToInteger(_col0) + 0) (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 KEY.reducesinkkey0 (type: int) 1 KEY.reducesinkkey0 (type: int) - 2 KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 5 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + 1 Map 5 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is not null (type: boolean) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 1126 Data size: 242179 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 6 + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1238 Data size: 266396 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat