diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 09c667e..c2f6084 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -491,6 +491,8 @@ spark.query.files=add_part_multiple.q, \ auto_join_filters.q, \ auto_join_nulls.q, \ auto_join_reordering_values.q, \ + auto_join_stats.q, \ + auto_join_stats2.q, \ auto_join_without_localtask.q, \ auto_smb_mapjoin_14.q, \ auto_sortmerge_join_1.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 8b78123..c8d0ecd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -90,7 +91,6 @@ int numBuckets = 1; LOG.info("Estimated number of buckets " + numBuckets); - int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets); /* TODO: handle this later if (mapJoinConversionPos < 0) { @@ -153,8 +153,8 @@ LOG.info("Convert to non-bucketed map join"); // check if we can convert to map join no bucket scaling. - mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1); - + ObjectPair mapJoinInfo = getMapJoinConversionInfo(joinOp, context, 1); + int mapJoinConversionPos = mapJoinInfo.getFirst(); if (mapJoinConversionPos < 0) { // we are just converting to a common merge join operator. The shuffle @@ -175,6 +175,8 @@ setAllChildrenTraitsToNull(childOp); } + context.getMjOpSizes().put(mapJoinOp, mapJoinInfo.getSecond()); + return null; } @@ -311,10 +313,10 @@ private void setNumberOfBucketsOnChildren(Operator curre * @param joinOp * @param context * @param buckets - * @return + * @return pair, first value is the position, second value is the in-memory size of this mapjoin. */ - private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContext context, - int buckets) { + private ObjectPair getMapJoinConversionInfo(JoinOperator joinOp, OptimizeSparkProcContext context, + int buckets) { Set bigTableCandidateSet = MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); @@ -336,7 +338,7 @@ private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContex Statistics currInputStat = parentOp.getStatistics(); if (currInputStat == null) { LOG.warn("Couldn't get statistics from: "+parentOp); - return -1; + return new ObjectPair(-1, 0); } // Union is hard to handle. For instance, the following case: @@ -359,7 +361,7 @@ private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContex // But, this is tricky to implement, and we'll leave it as a future work for now. // TODO: handle this as a MJ case if (containUnionWithoutRS(parentOp.getParentOperators().get(0))) { - return -1; + return new ObjectPair(-1, 0); } long inputSize = currInputStat.getDataSize(); @@ -370,14 +372,14 @@ private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContex if (bigTableFound) { // cannot convert to map join; we've already chosen a big table // on size and there's another one that's bigger. - return -1; + return new ObjectPair(-1, 0); } if (inputSize/buckets > maxSize) { if (!bigTableCandidateSet.contains(pos)) { // can't use the current table as the big table, but it's too // big for the map side. - return -1; + return new ObjectPair(-1, 0); } bigTableFound = true; @@ -392,7 +394,7 @@ private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContex if (totalSize/buckets > maxSize) { // sum of small tables size in this join exceeds configured limit // hence cannot convert. - return -1; + return new ObjectPair(-1, 0); } if (bigTableCandidateSet.contains(pos)) { @@ -403,13 +405,93 @@ private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContex totalSize += currInputStat.getDataSize(); if (totalSize/buckets > maxSize) { // cannot hold all map tables in memory. Cannot convert. - return -1; + return new ObjectPair(-1, 0); } } pos++; } - return bigTablePosition; + if (bigTablePosition == -1) { + //No big table candidates. + return new ObjectPair(-1, 0); + } + + //Final check, find size of already-calculated Mapjoin Operators in same work (spark-stage). We need to factor + //this in to prevent overwhelming Spark executor-memory. + long connectedMapJoinSize = getConnectedMapJoinSize(joinOp.getParentOperators().get(bigTablePosition), joinOp, context); + if ((connectedMapJoinSize + (totalSize / buckets)) > maxSize) { + return new ObjectPair(-1, 0); + } + + return new ObjectPair(bigTablePosition, connectedMapJoinSize + (totalSize / buckets)); + } + + /** + * Examines this operator and all the connected operators, for mapjoins that will be in the same work. + * @param parentOp potential big-table parent operator, explore up from this. + * @param joinOp potential mapjoin operator, explore down from this. + * @param ctx context to pass information. + * @return total size of parent mapjoins in same work as this operator. + */ + private long getConnectedMapJoinSize(Operator parentOp, Operator joinOp, OptimizeSparkProcContext ctx) { + long result = 0; + for (Operator grandParentOp : parentOp.getParentOperators()) { + result += getConnectedParentMapJoinSize(grandParentOp, ctx); + } + result += getConnectedChildMapJoinSize(joinOp, ctx); + return result; + } + + /** + * Examines this operator and all the parents, for mapjoins that will be in the same work. + * @param op given operator + * @param ctx context to pass information. + * @return total size of parent mapjoins in same work as this operator. + */ + private long getConnectedParentMapJoinSize(Operator op, OptimizeSparkProcContext ctx) { + if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) { + //Work Boundary, stop exploring. + return 0; + } + + if (op instanceof MapJoinOperator) { + //found parent mapjoin operator. Its size should already reflect any other mapjoins connected to it. + long mjSize = ctx.getMjOpSizes().get(op); + return mjSize; + } + + long result = 0; + for (Operator parentOp : op.getParentOperators()) { + //Else, recurse up the parents. + result += getConnectedParentMapJoinSize(parentOp, ctx); + } + return result; + } + + /** + * Examines this operator and all the children, for mapjoins that will be in the same work. + * @param op given operator + * @param ctx context to pass information. + * @return total size of child mapjoins in same work as this operator. + */ + private long getConnectedChildMapJoinSize(Operator op, OptimizeSparkProcContext ctx) { + if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) { + //Work Boundary, stop exploring. + return 0; + } + + if (op instanceof MapJoinOperator) { + //found child mapjoin operator. Its size should already reflect any mapjoins connected to it, so stop processing. + long mjSize = ctx.getMjOpSizes().get(op); + return mjSize; + } + + long result = 0; + for (Operator childOp : op.getChildOperators()) { + //Else, recurse to the children. + result += getConnectedChildMapJoinSize(childOp, ctx); + } + return result; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java index 0c339a5..f7586a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse.spark; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -28,7 +29,9 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import java.util.Deque; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; /** @@ -44,6 +47,7 @@ private final Set inputs; private final Set outputs; private final Set visitedReduceSinks = new HashSet(); + private final Map mjOpSizes = new HashMap(); // rootOperators are all the table scan operators in sequence // of traversal @@ -83,4 +87,8 @@ public HiveConf getConf() { public Deque> getRootOperators() { return rootOperators; } + + public Map getMjOpSizes() { + return mjOpSizes; + } } diff --git ql/src/test/queries/clientpositive/auto_join_stats.q ql/src/test/queries/clientpositive/auto_join_stats.q new file mode 100644 index 0000000..de56abd --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join_stats.q @@ -0,0 +1,19 @@ +set hive.auto.convert.join = true; +set hive.auto.convert.join.noconditionaltask.size=2660; + +-- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable; +analyze table smalltable compute statistics; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); + +create table smalltable2(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable2; +analyze table smalltable compute statistics; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/auto_join_stats2.q ql/src/test/queries/clientpositive/auto_join_stats2.q new file mode 100644 index 0000000..bf39c95 --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join_stats2.q @@ -0,0 +1,17 @@ +set hive.auto.convert.join = true; + +-- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key); + +create table smalltable2(key string, value string) stored as textfile; +load data local inpath '../../data/files/T1.txt' into table smalltable2; +analyze table smalltable compute statistics; + +explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); +select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key); \ No newline at end of file diff --git ql/src/test/results/clientpositive/auto_join_stats.q.out ql/src/test/results/clientpositive/auto_join_stats.q.out new file mode 100644 index 0000000..7aff4d2 --- /dev/null +++ ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -0,0 +1,545 @@ +PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage , consists of Stage-10, Stage-11, Stage-1 + Stage-10 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-10 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-9 + Stage-11 has a backup stage: Stage-1 + Stage-7 depends on stages: Stage-11 + Stage-1 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-8 + Conditional Operator + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src2 + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 + Stage-13 has a backup stage: Stage-1 + Stage-9 depends on stages: Stage-13 + Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 + Stage-7 depends on stages: Stage-12 + Stage-14 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-14 + Stage-1 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-11 + Conditional Operator + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src2 + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + smalltable2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + smalltable2 + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git ql/src/test/results/clientpositive/auto_join_stats2.q.out ql/src/test/results/clientpositive/auto_join_stats2.q.out new file mode 100644 index 0000000..ed7f62b --- /dev/null +++ ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -0,0 +1,311 @@ +PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + src1 + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltable + Fetch Operator + limit: -1 + smalltable2 + Fetch Operator + limit: -1 + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltable + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + smalltable2 + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + src1 + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git ql/src/test/results/clientpositive/spark/auto_join_stats.q.out ql/src/test/results/clientpositive/spark/auto_join_stats.q.out new file mode 100644 index 0000000..14c58fb --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_stats.q.out @@ -0,0 +1,347 @@ +PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable). +-- This query plan should thus not try to combine the mapjoin into a single work. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 5 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8 diff --git ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out new file mode 100644 index 0000000..2fc1186 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out @@ -0,0 +1,327 @@ +PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable +POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2. +-- Hence forcing the third table to be smaller. + +create table smalltable(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 3 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +PREHOOK: query: create table smalltable2(key string, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smalltable2 +POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smalltable2 +PREHOOK: query: analyze table smalltable compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Output: default@smalltable +POSTHOOK: query: analyze table smalltable compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Output: default@smalltable +PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 3 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 4 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable +PREHOOK: Input: default@smalltable2 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable +POSTHOOK: Input: default@smalltable2 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4 8 +4 4 8 +4 4 8 +4 4 8