diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index 1814550..cfc83ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -20,9 +20,11 @@ import java.io.Serializable; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -30,10 +32,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** @@ -73,37 +79,6 @@ public ConstantPropagateProcCtx(ConstantPropagateOption option) { } /** - * Resolve a ColumnInfo based on given RowResolver. - * - * @param ci - * @param rr - * @param parentRR - * @return - * @throws SemanticException - */ - private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { - // Resolve new ColumnInfo from - String alias = ci.getAlias(); - if (alias == null) { - alias = ci.getInternalName(); - } - String tblAlias = ci.getTabAlias(); - ColumnInfo rci = rs.getColumnInfo(tblAlias, alias); - if (rci == null && rs.getTableNames().size() == 1 && - parentRS.getTableNames().size() == 1) { - rci = rs.getColumnInfo(rs.getTableNames().iterator().next(), - alias); - } - if (rci == null) { - return null; - } - LOG.debug("Resolved " - + ci.getTabAlias() + "." + ci.getAlias() + " as " - + rci.getTabAlias() + "." + rci.getAlias() + " with rs: " + rs); - return rci; - } - - /** * Get propagated constant map from parents. * * Traverse all parents of current operator, if there is propagated constant (determined by @@ -115,8 +90,8 @@ private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { * @return map of ColumnInfo to ExprNodeDesc. The values of that map must be either * ExprNodeConstantDesc or ExprNodeNullDesc. */ - public Map getPropagatedConstants( - Operator op) { + public Map getPropagatedConstants(Operator op) { + // this map should map columnInfo to ExprConstantNodeDesc Map constants = new HashMap(); if (op.getSchema() == null) { return constants; @@ -128,82 +103,134 @@ private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) { return constants; } - if (op instanceof UnionOperator) { - String alias = rs.getSignature().get(0).getTabAlias(); - // find intersection - Map intersection = null; - for (Operator parent : op.getParentOperators()) { - Map unionConst = opToConstantExprs.get(parent); - LOG.debug("Constant of op " + parent.getOperatorId() + " " + unionConst); - if (intersection == null) { - intersection = new HashMap(); - for (Entry e : unionConst.entrySet()) { - ColumnInfo ci = new ColumnInfo(e.getKey()); - ci.setTabAlias(alias); - intersection.put(ci, e.getValue()); + // A previous solution is based on tableAlias and colAlias, which is + // unsafe, esp. when CBO generates derived table names. see HIVE-13602. + // For correctness purpose, we only trust colExpMap. + // We assume that CBO can do the constantPropagation before this function is + // called to help improve the performance. + // UnionOperator, LimitOperator and FilterOperator are special, they should already be + // column-position aligned. + + List> parentsToConstant = new ArrayList<>(); + boolean areAllParentsContainConstant = true; + boolean noParentsContainConstant = true; + for (Operator parent : op.getParentOperators()) { + Map constMap = opToConstantExprs.get(parent); + if (constMap == null) { + LOG.debug("Constant of Op " + parent.getOperatorId() + " is not found"); + areAllParentsContainConstant = false; + } else { + noParentsContainConstant = false; + Map map = new HashMap<>(); + for (Entry entry : constMap.entrySet()) { + map.put(parent.getSchema().getPosition(entry.getKey().getInternalName()), + entry.getValue()); + } + parentsToConstant.add(map); + LOG.debug("Constant of Op " + parent.getOperatorId() + " " + constMap); + } + } + if (noParentsContainConstant) { + return constants; + } + + ArrayList signature = op.getSchema().getSignature(); + if (op instanceof LimitOperator || op instanceof FilterOperator) { + // there should be only one parent. + if (op.getParentOperators().size() == 1) { + Map parentToConstant = parentsToConstant.get(0); + for (int index = 0; index < signature.size(); index++) { + if (parentToConstant.containsKey(index)) { + constants.put(signature.get(index), parentToConstant.get(index)); } - } else { - Iterator> itr = intersection.entrySet().iterator(); - while (itr.hasNext()) { - Entry e = itr.next(); - boolean found = false; - for (Entry f : opToConstantExprs.get(parent).entrySet()) { - if (e.getKey().getInternalName().equals(f.getKey().getInternalName())) { - if (e.getValue().isSame(f.getValue())) { - found = true; - } + } + } + } else if (op instanceof UnionOperator && areAllParentsContainConstant) { + for (int index = 0; index < signature.size(); index++) { + ExprNodeDesc constant = null; + for (Map parentToConstant : parentsToConstant) { + if (!parentToConstant.containsKey(index)) { + // if this parent does not contain a constant at this position, we + // continue to look at other positions. + constant = null; + break; + } else { + if (constant == null) { + constant = parentToConstant.get(index); + } else { + // compare if they are the same constant. + ExprNodeDesc nextConstant = parentToConstant.get(index); + if (!nextConstant.isSame(constant)) { + // they are not the same constant. for example, union all of 1 + // and 2. + constant = null; break; } } - if (!found) { - itr.remove(); - } } } - if (intersection.isEmpty()) { - return intersection; + // we have checked all the parents for the "index" position. + if (constant != null) { + constants.put(signature.get(index), constant); } } - LOG.debug("Propagated union constants:" + intersection); - return intersection; - } - - for (Operator parent : op.getParentOperators()) { - Map c = opToConstantExprs.get(parent); - for (Entry e : c.entrySet()) { - ColumnInfo ci = e.getKey(); - ExprNodeDesc constant = e.getValue(); - boolean resolved = false; - ColumnInfo rci = resolve(ci, rs, parent.getSchema()); - - if (rci != null) { - constants.put(rci, constant); - resolved = true; + } else if (op instanceof JoinOperator) { + JoinOperator joinOp = (JoinOperator) op; + Iterator>> itr = joinOp.getConf().getExprs().entrySet() + .iterator(); + while (itr.hasNext()) { + Entry> e = itr.next(); + int tag = e.getKey(); + Operator parent = op.getParentOperators().get(tag); + List exprs = e.getValue(); + if (exprs == null) { + continue; } - if (!resolved && - op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) { - for (Entry entry : op.getColumnExprMap().entrySet()) { - if (entry.getValue().isSame(constant)) { - ColumnInfo rsColumnInfo = rs.getColumnInfo(entry.getKey()); - if (rsColumnInfo == null) { - continue; + for (ExprNodeDesc expr : exprs) { + // we are only interested in ExprNodeColumnDesc + if (expr instanceof ExprNodeColumnDesc) { + String parentColName = ((ExprNodeColumnDesc) expr).getColumn(); + // find this parentColName in its parent's rs + int parentPos = parent.getSchema().getPosition(parentColName); + if (parentsToConstant.get(tag).containsKey(parentPos)) { + // this position in parent is a constant + // reverse look up colExprMap to find the childColName + if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) { + for (Entry entry : op.getColumnExprMap().entrySet()) { + if (entry.getValue().isSame(expr)) { + // now propagate the constant from the parent to the child + constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), + parentsToConstant.get(tag).get(parentPos)); + } + } } - constants.put(rsColumnInfo, constant); - resolved = true; } } } - - if (!resolved) { - LOG.debug("Can't resolve " + ci.getTabAlias() + "." + ci.getAlias() + - "(" + ci.getInternalName() + ") from rs:" + rs); + } + } else { + // there should be only one parent. + if (op.getParentOperators().size() == 1) { + Operator parent = op.getParentOperators().get(0); + if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) { + for (Entry entry : op.getColumnExprMap().entrySet()) { + ExprNodeDesc expr = entry.getValue(); + if (expr instanceof ExprNodeColumnDesc) { + String parentColName = ((ExprNodeColumnDesc) expr).getColumn(); + // find this parentColName in its parent's rs + int parentPos = parent.getSchema().getPosition(parentColName); + if (parentsToConstant.get(0).containsKey(parentPos)) { + // this position in parent is a constant + // now propagate the constant from the parent to the child + constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), + parentsToConstant.get(0).get(parentPos)); + } + } + } } } } - - LOG.debug("Offerring constants " + constants.keySet() - + " to operator " + op.toString()); - + LOG.debug("Offerring constants " + constants.keySet() + " to operator " + op.toString()); return constants; } diff --git a/ql/src/test/queries/clientpositive/constant_prop_1.q b/ql/src/test/queries/clientpositive/constant_prop_1.q new file mode 100644 index 0000000..9a0a17c --- /dev/null +++ b/ql/src/test/queries/clientpositive/constant_prop_1.q @@ -0,0 +1,51 @@ +set hive.cbo.enable=false; + + +explain +select 1 as a from src +union all +select 1 as a from src limit 1; + +explain +select a, key, value from +( +select 1 as a from src +union all +select 1 as a from src limit 1 +)sub join src b where value='12345'; + + +explain +select 1 as a from src +union all +select 2 as a from src limit 1; + +explain +select a, key, value from +( +select 1 as a from src +union all +select 2 as a from src limit 1 +)sub join src b where value='12345'; + +explain +select a.key, b.value from src a join src b where a.key = '238' and b.value = '234'; + +explain +select a.key, b.value from src a join src b on a.key=b.key where b.value = '234'; + +create table t ( +a int, +b int, +c int, +d int, +e int +); + +explain +select a2 as a3 from +(select a1 as a2, c1 as c2 from +(select a as a1, b as b1, c as c1 from t where a=1 and b=2 and c=3)sub1)sub2; + + + diff --git a/ql/src/test/results/clientpositive/constant_prop_1.q.out b/ql/src/test/results/clientpositive/constant_prop_1.q.out new file mode 100644 index 0000000..42aed3c --- /dev/null +++ b/ql/src/test/results/clientpositive/constant_prop_1.q.out @@ -0,0 +1,547 @@ +PREHOOK: query: explain +select 1 as a from src +union all +select 1 as a from src limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select 1 as a from src +union all +select 1 as a from src limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[13][tables = [sub, b]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 1 as a from src limit 1 +)sub join src b where value='12345' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 1 as a from src limit 1 +)sub join src b where value='12345' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value = '12345') (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 29 Data size: 3138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int), _col1 (type: string), '12345' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 3138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 3138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select 1 as a from src +union all +select 2 as a from src limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select 1 as a from src +union all +select 2 as a from src limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[13][tables = [sub, b]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 2 as a from src limit 1 +)sub join src b where value='12345' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a, key, value from +( +select 1 as a from src +union all +select 2 as a from src limit 1 +)sub join src b where value='12345' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int) + TableScan + alias: src + Statistics: Num rows: 1 Data size: 5812 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value = '12345') (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12345' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 3138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 3138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[4][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, b.value from src a join src b where a.key = '238' and b.value = '234' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.value from src a join src b where a.key = '238' and b.value = '234' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1937 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 968 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 968 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 1937 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value = '234') (type: boolean) + Statistics: Num rows: 968 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 968 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1064 Data size: 3194 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '238' (type: string), '234' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1064 Data size: 3194 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1064 Data size: 3194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.value from src a join src b on a.key=b.key where b.value = '234' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.value from src a join src b on a.key=b.key where b.value = '234' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 56 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (value = '234')) (type: boolean) + Statistics: Num rows: 28 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 28 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), '234' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table t ( +a int, +b int, +c int, +d int, +e int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t ( +a int, +b int, +c int, +d int, +e int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: explain +select a2 as a3 from +(select a1 as a2, c1 as c2 from +(select a as a1, b as b1, c as c1 from t where a=1 and b=2 and c=3)sub1)sub2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a2 as a3 from +(select a1 as a2, c1 as c2 from +(select a as a1, b as b1, c as c1 from t where a=1 and b=2 and c=3)sub1)sub2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +