diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 12fcd6a..150cc5a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -53,6 +53,8 @@ minimr.query.files=auto_sortmerge_join_16.q,\ minitez.query.files.shared=alter_merge_2_orc.q,\ alter_merge_orc.q,\ alter_merge_stats_orc.q,\ + annotate_stats_join.q,\ + annotate_stats_join_pkfk.q,\ auto_join0.q,\ auto_join1.q,\ bucket2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 8bba7b6..7296695 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -18,8 +18,12 @@ package org.apache.hadoop.hive.ql.optimizer.stats.annotation; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.lang.reflect.Field; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -53,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -72,13 +77,8 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import java.lang.reflect.Field; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Stack; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; public class StatsRulesProcFactory { @@ -1053,7 +1053,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // statistics object that is combination of statistics from all // relations involved in JOIN Statistics stats = new Statistics(); - Map rowCountParents = new HashMap(); + Map rowCountParents = Maps.newHashMap(); List distinctVals = Lists.newArrayList(); int numParent = parents.size(); Map joinedColStats = Maps.newHashMap(); @@ -1072,20 +1072,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); - + ReduceSinkDesc rsConf = parent.getConf(); Statistics parentStats = parent.getStatistics(); keyExprs = parent.getConf().getOutputKeyColumnNames(); // Parent RS may have column statistics from multiple parents. // Populate table alias to row count map, this will be used later to // scale down/up column statistics based on new row count - // NOTE: JOIN with UNION as parent of RS will not have table alias - // propagated properly. UNION operator does not propagate the table - // alias of subqueries properly to expression nodes. Hence union20.q - // will have wrong number of rows. Set tableAliases = StatsUtils.getAllTableAlias(parent.getColumnExprMap()); - for (String tabAlias : tableAliases) { - rowCountParents.put(tabAlias, parentStats.getNumRows()); + boolean allNulls = true; + for (String alias : tableAliases) { + if (alias != null) { + allNulls = false; + } + } + + // if all table aliases of the current side of join is null, then use tag number as + // table alias + String tag = String.valueOf(rsConf.getTag()); + if (allNulls) { + rowCountParents.put(tag, parentStats.getNumRows()); + } else { + for (String alias : tableAliases) { + rowCountParents.put(alias, parentStats.getNumRows()); + } } rowCounts.add(parentStats.getNumRows()); @@ -1094,12 +1104,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // TODO: expressions in join condition will be ignored. assign // internal name for expressions and estimate column statistics for expression. List fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keyExprs, - parent.getColumnExprMap()); + parent.getColumnExprMap(), tag); joinKeys.put(pos, fqCols); // get column statistics for all output columns for (ColStatistics cs : parentStats.getColumnStats()) { - joinedColStats.put(cs.getFullyQualifiedColName(), cs); + + // if table alias does not exist, use tag information of reduce sink + if (cs.getTableAlias() == null) { + cs.setTableAlias(tag); + } + joinedColStats.put(StatsUtils.getFullyQualifiedColumnName(cs.getTableAlias(), + cs.getColumnName()), cs); } // since new statistics is derived from all relations involved in @@ -1155,21 +1171,31 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Map colExprMap = jop.getColumnExprMap(); RowSchema rs = jop.getSchema(); List outColStats = Lists.newArrayList(); - Map outInTabAlias = new HashMap(); + Map reversedExpr = jop.getConf().getReversedExprs(); + Map inOutTabAlias = Maps.newHashMap(); for (ColumnInfo ci : rs.getSignature()) { String key = ci.getInternalName(); ExprNodeDesc end = colExprMap.get(key); if (end instanceof ExprNodeColumnDesc) { String colName = ((ExprNodeColumnDesc) end).getColumn(); String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias(); + + // if expression nodes from output row schema does not have table alias, get the + // table alias from tag to alias map + if (tabAlias == null || tabAlias.isEmpty()) { + tabAlias = String.valueOf(reversedExpr.get(key)); + } + + // input table alias to output table alias is map is used while updating stats after + // joining. The column statistics of output column stats will have output table alias + // where the column stats that are received as input from reduce sink operator will + // have input table alias + inOutTabAlias.put(tabAlias, ci.getTabAlias()); String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); ColStatistics cs = joinedColStats.get(fqColName); - String outColName = key; - String outTabAlias = ci.getTabAlias(); - outInTabAlias.put(outTabAlias, tabAlias); if (cs != null) { - cs.setColumnName(outColName); - cs.setTableAlias(outTabAlias); + cs.setColumnName(key); + cs.setTableAlias(tabAlias); } outColStats.add(cs); } @@ -1178,7 +1204,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // update join statistics stats.setColumnStats(outColStats); long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom); - updateStatsForJoinType(stats, newRowCount, jop, rowCountParents,outInTabAlias); + updateStatsForJoinType(stats, newRowCount, jop, rowCountParents, inOutTabAlias); jop.setStatistics(stats); if (isDebugEnabled) { @@ -1208,8 +1234,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } long maxDataSize = parentSizes.get(maxRowIdx); - long newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); - long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor); + long newNumRows = StatsUtils + .safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); + long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), + joinFactor); Statistics wcStats = new Statistics(); wcStats.setNumRows(newNumRows); wcStats.setDataSize(newDataSize); @@ -1366,7 +1394,7 @@ private float getSelectivityComplexTree(Operator op) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; List keys = rsOp.getConf().getOutputKeyColumnNames(); List fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys, - rsOp.getColumnExprMap()); + rsOp.getColumnExprMap(), null); if (fqCols.size() == 1) { String joinCol = fqCols.get(0); if (rsOp.getStatistics() != null) { @@ -1397,7 +1425,7 @@ private float getSelectivityComplexTree(Operator op) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; List keys = rsOp.getConf().getOutputKeyColumnNames(); List fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys, - rsOp.getColumnExprMap()); + rsOp.getColumnExprMap(), null); if (fqCols.size() == 1) { String joinCol = fqCols.get(0); if (rsOp.getStatistics() != null) { @@ -1429,8 +1457,7 @@ private Long getEasedOutDenominator(List distinctVals) { private void updateStatsForJoinType(Statistics stats, long newNumRows, CommonJoinOperator jop, - Map rowCountParents, - Map outInTabAlias) { + Map rowCountParents, Map inOutTabAlias) { if (newNumRows < 0) { LOG.info("STATS-" + jop.toString() + ": Overflow in number of rows." @@ -1447,7 +1474,7 @@ private void updateStatsForJoinType(Statistics stats, long newNumRows, // and stats for columns from 2nd parent should be scaled down by 200x List colStats = stats.getColumnStats(); for (ColStatistics cs : colStats) { - long oldRowCount = rowCountParents.get(outInTabAlias.get(cs.getTableAlias())); + long oldRowCount = rowCountParents.get(cs.getTableAlias()); double ratio = (double) newNumRows / (double) oldRowCount; long oldDV = cs.getCountDistint(); long newDV = oldDV; @@ -1463,6 +1490,11 @@ private void updateStatsForJoinType(Statistics stats, long newNumRows, // TODO: HIVE-5579 will handle different join types cs.setNumNulls(0); cs.setCountDistint(newDV); + + // update the table alias to output table alias + String inTabAlias = cs.getTableAlias(); + String outTabAlias = inOutTabAlias.get(inTabAlias); + cs.setTableAlias(outTabAlias); } stats.setColumnStats(colStats); long newDataSize = StatsUtils diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 30f63a2..08ae76c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -18,10 +18,14 @@ package org.apache.hadoop.hive.ql.stats; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import com.google.common.math.DoubleMath; -import com.google.common.math.LongMath; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -86,14 +90,9 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.tez.mapreduce.hadoop.MRJobConfig; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import com.google.common.math.LongMath; public class StatsUtils { @@ -1348,10 +1347,12 @@ private static String getFullyQualifiedName(String... names) { * - output key names * @param map * - column expression map + * @param tableAlias + * - use the provided table alias if table alias could not be determined * @return list of fully qualified names */ public static List getFullyQualifedReducerKeyNames(List keyExprs, - Map map) { + Map map, String tableAlias) { List result = Lists.newArrayList(); if (keyExprs != null) { for (String key : keyExprs) { @@ -1367,14 +1368,15 @@ private static String getFullyQualifiedName(String... names) { } if (end instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; - String tabAlias = encd.getTabAlias(); + String tabAlias = encd.getTabAlias() == null ? tableAlias : encd.getTabAlias(); result.add(getFullyQualifiedColumnName(tabAlias, colName)); } else if (end instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end; String tabAlias = ""; for (ExprNodeDesc childEnd : enf.getChildren()) { if (childEnd instanceof ExprNodeColumnDesc) { - tabAlias = ((ExprNodeColumnDesc) childEnd).getTabAlias(); + tabAlias = ((ExprNodeColumnDesc) childEnd).getTabAlias() == null ? tableAlias : + ((ExprNodeColumnDesc) childEnd).getTabAlias(); break; } } diff --git ql/src/test/results/clientpositive/tez/annotate_stats_join.q.out ql/src/test/results/clientpositive/tez/annotate_stats_join.q.out new file mode 100644 index 0000000..bdcce14 --- /dev/null +++ ql/src/test/results/clientpositive/tez/annotate_stats_join.q.out @@ -0,0 +1,751 @@ +PREHOOK: query: create table if not exists emp ( + lastname string, + deptid int, + locid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp +POSTHOOK: query: create table if not exists emp ( + lastname string, + deptid int, + locid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp +PREHOOK: query: create table if not exists dept ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dept +POSTHOOK: query: create table if not exists dept ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dept +PREHOOK: query: create table if not exists loc ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc +POSTHOOK: query: create table if not exists loc ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@emp +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@emp +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dept +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dept +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp +PREHOOK: Output: default@emp +POSTHOOK: query: analyze table emp compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp +PREHOOK: query: analyze table dept compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dept +PREHOOK: Output: default@dept +POSTHOOK: query: analyze table dept compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept +PREHOOK: query: analyze table loc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc +PREHOOK: Output: default@loc +POSTHOOK: query: analyze table loc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp +#### A masked pattern was here #### +PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@dept +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept +#### A masked pattern was here #### +PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc +#### A masked pattern was here #### +PREHOOK: query: -- number of rows +-- emp - 48 +-- dept - 6 +-- loc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- number of rows +-- emp - 48 +-- dept - 6 +-- loc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname +PREHOOK: type: QUERY +POSTHOOK: query: -- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string), _col0 (type: string) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col0 (type: string), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) +PREHOOK: type: QUERY +POSTHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: locid is not null (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: bigint), _col3 (type: int) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: lastname (type: string), deptid (type: int), locid (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptid (type: int), deptname (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid is not null and state is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col0 (type: string) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: int) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col1 (type: int), _col0 (type: string) + 1 _col0 (type: int), _col1 (type: string) + 2 _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/tez/annotate_stats_join_pkfk.q.out ql/src/test/results/clientpositive/tez/annotate_stats_join_pkfk.q.out new file mode 100644 index 0000000..8e90f3a --- /dev/null +++ ql/src/test/results/clientpositive/tez/annotate_stats_join_pkfk.q.out @@ -0,0 +1,1166 @@ +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table customer_address +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table customer_address +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- s_store_sk is PK, ss_store_sk is FK +-- ca_address_sk is PK, ss_addr_sk is FK + +create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: -- s_store_sk is PK, ss_store_sk is FK +-- ca_address_sk is PK, ss_addr_sk is FK + +create table store_sales +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost float, + ss_list_price float, + ss_sales_price float, + ss_ext_discount_amt float, + ss_ext_sales_price float, + ss_ext_wholesale_cost float, + ss_ext_list_price float, + ss_ext_tax float, + ss_coupon_amt float, + ss_net_paid float, + ss_net_paid_inc_tax float, + ss_net_profit float +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset float, + s_tax_precentage float +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset float, + s_tax_precentage float +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset float, + ca_location_type string +) +row format delimited fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_address +POSTHOOK: query: create table customer_address +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset float, + ca_location_type string +) +row format delimited fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_address +PREHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store +POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store +PREHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@store_sales +POSTHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@store_sales +PREHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@customer_address +POSTHOOK: query: load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@customer_address +PREHOOK: query: analyze table store compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: analyze table store compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space +PREHOOK: type: QUERY +PREHOOK: Input: default@store +#### A masked pattern was here #### +POSTHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store +#### A masked pattern was here #### +PREHOOK: query: analyze table store_sales compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: analyze table store_sales compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PREHOOK: query: analyze table customer_address compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address +POSTHOOK: query: analyze table customer_address compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address +PREHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +#### A masked pattern was here #### +POSTHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +#### A masked pattern was here #### +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_store_sk > 0) (type: boolean) + Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_store_sk > 0) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 321 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 321 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 321 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 321 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_store_sk > 1000) (type: boolean) + Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_store_sk > 1000) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_store_sk > 1000) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) where s.s_floor_space > 1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 3716 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join store s1 on (s1.s_store_sk = ss.ss_store_sk) where ss.ss_quantity > 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 321 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 321 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ss + Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_addr_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 916 Data size: 7012 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_addr_sk (type: int), ss_store_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 916 Data size: 7012 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 916 Data size: 7012 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: ca + Statistics: Num rows: 20 Data size: 2114 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3 + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 210 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table customer_address +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address +POSTHOOK: query: drop table customer_address +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address