diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 819eef1..5386661 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -90,7 +91,6 @@ int numBuckets = 1; LOG.info("Estimated number of buckets " + numBuckets); - int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets); /* TODO: handle this later if (mapJoinConversionPos < 0) { @@ -153,8 +153,8 @@ LOG.info("Convert to non-bucketed map join"); // check if we can convert to map join no bucket scaling. - mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1); - + ObjectPair mapJoinInfo = getMapJoinConversionInfo(joinOp, context, 1); + int mapJoinConversionPos = mapJoinInfo.getFirst(); if (mapJoinConversionPos < 0) { // we are just converting to a common merge join operator. The shuffle @@ -175,6 +175,8 @@ setAllChildrenTraitsToNull(childOp); } + context.getMjOpSizes().put(mapJoinOp, mapJoinInfo.getSecond()); + return null; } @@ -311,32 +313,39 @@ private void setNumberOfBucketsOnChildren(Operator curre * @param joinOp * @param context * @param buckets - * @return + * @return pair, first value is the position, second value is the in-memory size of this mapjoin. */ - private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContext context, - int buckets) { + private ObjectPair getMapJoinConversionInfo(JoinOperator joinOp, OptimizeSparkProcContext context, + int buckets) { Set bigTableCandidateSet = - MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); + MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); long maxSize = context.getConf().getLongVar( - HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); - int bigTablePosition = -1; + int currBigTablePos = -1; + long currBigTableSize = 0; - Statistics bigInputStat = null; - long totalSize = 0; + long totalSmallTableSizes = 0; //total size of memory for this mapjoin. int pos = 0; - // bigTableFound means we've encountered a table that's bigger than the + // mandatoryBigTableFound means we've encountered a table that's bigger than the // max. This table is either the the big table or we cannot convert. - boolean bigTableFound = false; + boolean mandatoryBigTableFound = false; + + //intermediate state, keeps track of whether a candidate has been set. + boolean bigTableCandidateFound = false; + + //First calculate size of already-calculated Mapjoin Operators in same work (spark-stage). We need to factor + //this in to prevent overwhelming Spark executor-memory. + long connectedMapJoinSize = getConnectedMapJoinSize(joinOp, context); for (Operator parentOp : joinOp.getParentOperators()) { Statistics currInputStat = parentOp.getStatistics(); if (currInputStat == null) { - LOG.warn("Couldn't get statistics from: "+parentOp); - return -1; + LOG.warn("Skipping mapjoin, as couldn't get statistics from: " + parentOp); + return new ObjectPair(-1, 0); } // Union is hard to handle. For instance, the following case: @@ -359,57 +368,133 @@ private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContex // But, this is tricky to implement, and we'll leave it as a future work for now. // TODO: handle this as a MJ case if (containUnionWithoutRS(parentOp.getParentOperators().get(0))) { - return -1; + return new ObjectPair(-1, 0); } - long inputSize = currInputStat.getDataSize(); - if ((bigInputStat == null) || - ((bigInputStat != null) && - (inputSize > bigInputStat.getDataSize()))) { + long currInputSize = currInputStat.getDataSize() + connectedMapJoinSize; + + if (!bigTableCandidateFound || + (bigTableCandidateFound && (currInputSize > currBigTableSize))) { + //No candidates yet for big-table, or there is but this is bigger + //Explore if this is a good candidate. - if (bigTableFound) { - // cannot convert to map join; we've already chosen a big table - // on size and there's another one that's bigger. - return -1; + if (mandatoryBigTableFound) { + // cannot convert to map join; we've already chosen another table + // that won't fit into memory. + return new ObjectPair(-1, 0); } - if (inputSize/buckets > maxSize) { + if (currInputSize / buckets > maxSize) { + //oversized table that must be big table. + if (!bigTableCandidateSet.contains(pos)) { - // can't use the current table as the big table, but it's too - // big for the map side. - return -1; + // can't use the current table as the big table + return new ObjectPair(-1, 0); + } else { + mandatoryBigTableFound = true; } - - bigTableFound = true; } - if (bigInputStat != null) { + if (currBigTableSize != 0) { // we're replacing the current big table with a new one. Need // to count the current one as a map table then. - totalSize += bigInputStat.getDataSize(); + totalSmallTableSizes += currBigTableSize; } - if (totalSize/buckets > maxSize) { + if (totalSmallTableSizes / buckets > maxSize) { // sum of small tables size in this join exceeds configured limit // hence cannot convert. - return -1; + return new ObjectPair(-1, 0); } if (bigTableCandidateSet.contains(pos)) { - bigTablePosition = pos; - bigInputStat = currInputStat; + currBigTablePos = pos; + currBigTableSize = currInputSize; + bigTableCandidateFound = true; } } else { - totalSize += currInputStat.getDataSize(); - if (totalSize/buckets > maxSize) { + totalSmallTableSizes += currInputSize; + if (totalSmallTableSizes / buckets > maxSize) { // cannot hold all map tables in memory. Cannot convert. - return -1; + return new ObjectPair(-1, 0); } } pos++; } - return bigTablePosition; + return new ObjectPair(currBigTablePos, totalSmallTableSizes); + } + + /** + * Examines this operator and all the connected operators, for mapjoins that will be in the same work. + * @param op given operator + * @param ctx context to pass information. + * @return total size of parent mapjoins in same work as this operator. + */ + private long getConnectedMapJoinSize(Operator op, OptimizeSparkProcContext ctx) { + long result = 0; + for (Operator parentOp : op.getParentOperators()) { + //as the first op is still a RS that will be removed if converted to mapjoin, we need to force exploration here. + result += getConnectedParentMapJoinSize(parentOp, ctx, true); + } + result += getConnectedChildMapJoinSize(op, ctx); + return result; + } + + /** + * Examines this operator and all the parents, for mapjoins that will be in the same work. + * @param op given operator + * @param ctx context to pass information. + * @param forceContinue forces exploration even if we hit a reduce-sink which normally marks a work boundary. + * Note this is before selection of big-table so reduce-sinks are not removed yet. + * @return total size of parent mapjoins in same work as this operator. + */ + private long getConnectedParentMapJoinSize(Operator op, OptimizeSparkProcContext ctx, boolean forceContinue) { + if (!forceContinue && ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator))) { + //Work Boundary, stop exploring. + return 0; + } + + if (op instanceof MapJoinOperator) { + //found parent mapjoin operator. Its size should already reflect any other mapjoins connected to it. + long mjSize = ctx.getMjOpSizes().get((MapJoinOperator) op); + int bigTablePosition = ((MapJoinOperator) op).getConf().getPosBigTable(); + return mjSize; + } + + long result = 0; + for (Operator parentOp : op.getParentOperators()) { + //Else, recurse up the parents. + result += getConnectedParentMapJoinSize(parentOp, ctx, false); + } + return result; + } + + /** + * Examines this operator and all the children, for mapjoins that will be in the same work. + * @param op given operator + * @param ctx context to pass information. + * @return total size of child mapjoins in same work as this operator. + */ + private long getConnectedChildMapJoinSize(Operator op, OptimizeSparkProcContext ctx) { + if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) { + //Work Boundary, stop exploring. + return 0; + } + + if (op instanceof MapJoinOperator) { + //found child mapjoin operator. Its size should already reflect any mapjoins connected to it, so stop processing. + long mjSize = ctx.getMjOpSizes().get((MapJoinOperator) op); + int bigTablePosition = ((MapJoinOperator) op).getConf().getPosBigTable(); + return mjSize; + } + + long result = 0; + for (Operator childOp : op.getChildOperators()) { + //Else, recurse to the children. + result += getConnectedChildMapJoinSize(childOp, ctx); + } + return result; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java index 0c339a5..f7586a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse.spark; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -28,7 +29,9 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import java.util.Deque; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; /** @@ -44,6 +47,7 @@ private final Set inputs; private final Set outputs; private final Set visitedReduceSinks = new HashSet(); + private final Map mjOpSizes = new HashMap(); // rootOperators are all the table scan operators in sequence // of traversal @@ -83,4 +87,8 @@ public HiveConf getConf() { public Deque> getRootOperators() { return rootOperators; } + + public Map getMjOpSizes() { + return mjOpSizes; + } } diff --git ql/src/test/queries/clientpositive/auto_join_stats.q ql/src/test/queries/clientpositive/auto_join_stats.q new file mode 100644 index 0000000..de56abd --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join_stats.q @@ -0,0 +1,19 @@ +set hive.auto.convert.join = true; +set hive.auto.convert.join.noconditionaltask.size=2660; + +-- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable; +analyze table smalltable compute statistics; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); + +create table smalltable2(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable2; +analyze table smalltable compute statistics; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/auto_join_stats2.q ql/src/test/queries/clientpositive/auto_join_stats2.q new file mode 100644 index 0000000..bf39c95 --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join_stats2.q @@ -0,0 +1,17 @@ +set hive.auto.convert.join = true; + +-- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); + +create table smalltable2(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable2; +analyze table smalltable compute statistics; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); \ No newline at end of file diff --git ql/src/test/results/clientpositive/auto_join_stats.q.out ql/src/test/results/clientpositive/auto_join_stats.q.out new file mode 100644 index 0000000..5181d3a --- /dev/null +++ ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -0,0 +1,545 @@ +PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage , consists of Stage-10, Stage-11, Stage-1 + Stage-10 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-10 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-9 + Stage-11 has a backup stage: Stage-1 + Stage-7 depends on stages: Stage-11 + Stage-1 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-8 + Conditional Operator + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src2 + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 + Stage-13 has a backup stage: Stage-1 + Stage-9 depends on stages: Stage-13 + Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 + Stage-7 depends on stages: Stage-12 + Stage-14 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-14 + Stage-1 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-11 + Conditional Operator + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src2 + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + smalltable2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + smalltable2 + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git ql/src/test/results/clientpositive/auto_join_stats2.q.out ql/src/test/results/clientpositive/auto_join_stats2.q.out new file mode 100644 index 0000000..ed7f62b --- /dev/null +++ ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -0,0 +1,311 @@ +PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + src1 + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + smalltable2 + Fetch Operator + limit: -1 + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + smalltable2 + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + src1 + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git ql/src/test/results/clientpositive/spark/auto_join_stats.q.out ql/src/test/results/clientpositive/spark/auto_join_stats.q.out new file mode 100644 index 0000000..7c684ad --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_stats.q.out @@ -0,0 +1,347 @@ +PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 5 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out new file mode 100644 index 0000000..f7a4a12 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out @@ -0,0 +1,327 @@ +PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 3 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 4 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8