diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 443bc3b..2ebf8a8 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -474,6 +474,7 @@ HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", false), HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", 10000000L), + HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR("hive.optimize.mapjoin.mapreduce", false), HIVESKEWJOINKEY("hive.skewjoin.key", 100000), HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000), HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L), //32M diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index d1977a9..6656b0e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; @@ -759,6 +760,41 @@ public static void setKeyAndValueDesc(MapredWork plan, } /** + * Set the key and value description for all the tasks rooted at the given + * task. Loops over all the tasks recursively. + * + * @param task + */ + public static void setKeyAndValueDescForTaskTree(Task task) { + + if (task instanceof ConditionalTask) { + List> listTasks = ((ConditionalTask) task) + .getListTasks(); + for (Task tsk : listTasks) { + setKeyAndValueDescForTaskTree(tsk); + } + } else if (task.getWork() instanceof MapredWork) { + MapredWork work = (MapredWork) task.getWork(); + work.deriveExplainAttributes(); + HashMap> opMap = work + .getAliasToWork(); + if (!opMap.isEmpty()) { + for (Operator op : opMap.values()) { + setKeyAndValueDesc(work, op); + } + } + } + + if (task.getChildTasks() == null) { + return; + } + + for (Task childTask : task.getChildTasks()) { + setKeyAndValueDescForTaskTree(childTask); + } + } + + /** * create a new plan and return. * * @return the new plan diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 301d5fc..ba96bb3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -107,9 +107,11 @@ public MapJoinProcessor() { } /** - * Generate the MapRed Local Work + * Generate the MapRed Local Work for the given map-join operator + * * @param newWork * @param mapJoinOp + * map-join operator for which local work needs to be generated. * @param bigTablePos * @return * @throws SemanticException @@ -219,6 +221,16 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma return bigTableAlias; } + /** + * Convert the join to a map-join and also generate any local work needed. + * + * @param newWork MapredWork in which the conversion is to happen + * @param op + * The join operator that needs to be converted to map-join + * @param bigTablePos + * @return the alias to the big table + * @throws SemanticException + */ public static String genMapJoinOpAndLocalWork(MapredWork newWork, JoinOperator op, int mapJoinPos) throws SemanticException { try { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java index c9a2015..300f56d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; import org.apache.hadoop.hive.ql.lib.TaskGraphWalker.TaskGraphWalkerContext; +import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.QBJoinTree; @@ -145,7 +146,7 @@ private int getPosition(MapredWork work, Operator joinOp * A task and its child task has been converted from join to mapjoin. * See if the two tasks can be merged. */ - private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task) { + private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task, Configuration conf) { MapRedTask childTask = (MapRedTask)task.getChildTasks().get(0); MapredWork work = task.getWork(); MapredLocalWork localWork = work.getMapLocalWork(); @@ -231,6 +232,34 @@ private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task) { oldChildTask.getParentTasks().add(task); } } + + boolean convertToSingleJob = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR); + if (convertToSingleJob) { + mayBeCopyReducerConf(task, childTask); + } + } + + /** + * Copy reducer configuration if the childTask also has a reducer. + * @param task + * @param childTask + */ + private void mayBeCopyReducerConf(MapRedTask task, MapRedTask childTask) { + MapredWork childWork = childTask.getWork(); + Operator childReducer = childWork.getReducer(); + MapredWork work = task.getWork(); + if (childReducer == null) { + return; + } + work.setReducer(childReducer); + work.setNumReduceTasks(childWork.getNumReduceTasks()); + work.setJoinTree(childWork.getJoinTree()); + work.setNeedsTagging(childWork.getNeedsTagging()); + + // Make sure the key configuration is correct, clear and regenerate. + work.getTagToValueDesc().clear(); + GenMapRedUtils.setKeyAndValueDescForTaskTree(task); } // create map join task and set big table as bigTablePosition @@ -250,6 +279,120 @@ private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task) { return new ObjectPair(newTask, bigTableAlias); } + /* + * A task and its child task has been converted from join to mapjoin. + * See if the two tasks can be merged. + */ + private void mayBeMergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configuration conf) { + if (mapJoinTask.getChildTasks() == null) { + // Nothing downstairs to merge + return; + } + MapRedTask childTask = (MapRedTask)mapJoinTask.getChildTasks().get(0); + MapredWork mapJoinWork = mapJoinTask.getWork(); + MapredWork childWork = childTask.getWork(); + Operator childReducer = childWork.getReducer(); + if (childReducer == null) { + // Not a MR job, nothing to merge. + return; + } + + // Can this be merged + Map> aliasToWork = mapJoinWork.getAliasToWork(); + if (aliasToWork.size() > 1) { + return; + } + Map> childPathToAliases = childWork.getPathToAliases(); + if (childPathToAliases.size() > 1) { + return; + } + + // Locate leaf operator of the map-join task. Start by initializing leaf + // operator to be root operator. + Operator mapJoinLeafOperator = aliasToWork.values().iterator().next(); + while (mapJoinLeafOperator.getChildOperators() != null) { + // Dont perform this optimization for multi-table inserts + if (mapJoinLeafOperator.getChildOperators().size() > 1) { + return; + } + mapJoinLeafOperator = mapJoinLeafOperator.getChildOperators().get(0); + } + + if (!(mapJoinLeafOperator instanceof FileSinkOperator)) { + // Sanity check, shouldn't happen. + return; + } + + FileSinkOperator mapJoinTaskFileSinkOperator = (FileSinkOperator) mapJoinLeafOperator; + + // The filesink writes to a different directory + String workDir = mapJoinTaskFileSinkOperator.getConf().getDirName(); + if (!childPathToAliases.keySet().iterator().next().equals(workDir)) { + return; + } + + MapredLocalWork mapJoinLocalWork = mapJoinWork.getMapLocalWork(); + MapredLocalWork childLocalWork = childWork.getMapLocalWork(); + + // Either of them should not be bucketed + if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || + (childLocalWork != null && childLocalWork.getBucketMapjoinContext() != null)) { + return; + } + + if (childWork.getAliasToWork().size() > 1) { + return; + } + + Operator childAliasOp = + childWork.getAliasToWork().values().iterator().next(); + if (mapJoinTaskFileSinkOperator.getParentOperators().size() > 1) { + return; + } + + // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the + // top of the second + Operator parentFOp = mapJoinTaskFileSinkOperator + .getParentOperators().get(0); + parentFOp.getChildOperators().remove(mapJoinTaskFileSinkOperator); + parentFOp.getChildOperators().add(childAliasOp); + List> parentOps = + new ArrayList>(); + parentOps.add(parentFOp); + childAliasOp.setParentOperators(parentOps); + + mapJoinWork.getAliasToPartnInfo().putAll(childWork.getAliasToPartnInfo()); + for (Map.Entry childWorkEntry : + childWork.getPathToPartitionInfo().entrySet()) { + if (childWork.getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { + mapJoinWork.getPathToPartitionInfo().put(childWorkEntry.getKey(), childWorkEntry.getValue()); + } + } + + // Fill up stuff in local work + if (mapJoinLocalWork != null && childLocalWork != null) { + mapJoinLocalWork.getAliasToFetchWork().putAll(childLocalWork.getAliasToFetchWork()); + mapJoinLocalWork.getAliasToWork().putAll(childLocalWork.getAliasToWork()); + } + + // remove the child task + List> oldChildTasks = childTask.getChildTasks(); + mapJoinTask.setChildTasks(oldChildTasks); + if (oldChildTasks != null) { + for (Task oldChildTask : oldChildTasks) { + oldChildTask.getParentTasks().remove(childTask); + oldChildTask.getParentTasks().add(mapJoinTask); + } + } + + // Copy the reducer conf. + boolean convertToSingleJob = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR); + if (convertToSingleJob) { + mayBeCopyReducerConf(mapJoinTask, childTask); + } + } + private Task processCurrentTask(MapRedTask currTask, ConditionalTask conditionalTask, Context context) throws SemanticException { @@ -376,11 +519,21 @@ private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task) { // Can this task be merged with the child task. This can happen if a big table is being // joined with multiple small tables on different keys - // Further optimizations are possible here, a join which has been converted to a mapjoin - // followed by a mapjoin can be performed in a single MR job. - if ((newTask.getChildTasks() != null) && (newTask.getChildTasks().size() == 1) - && (newTask.getChildTasks().get(0).getTaskTag() == Task.MAPJOIN_ONLY_NOBACKUP)) { - mergeMapJoinTaskWithChildMapJoinTask(newTask); + if ((newTask.getChildTasks() != null) && (newTask.getChildTasks().size() == 1)) { + if (newTask.getChildTasks().get(0).getTaskTag() == Task.MAPJOIN_ONLY_NOBACKUP) { + // Merging two map-join tasks + mergeMapJoinTaskWithChildMapJoinTask(newTask, conf); + } + + // Converted the join operator into a map-join. Now see if it can + // be merged into the following map-reduce job. + boolean convertToSingleJob = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVEOPTIMIZEMAPJOINFOLLOWEDBYMR); + if (convertToSingleJob) { + // Trying merging a map-join task with a mapreduce job to have a + // single job. + mayBeMergeMapJoinTaskWithMapReduceTask(newTask, conf); + } } return newTask; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d0b1462..7021663 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8122,7 +8122,7 @@ private void genMapRedTasks(ParseContext pCtx) throws SemanticException { // For each task, set the key descriptor for the reducer for (Task rootTask : rootTasks) { - setKeyDescTaskTree(rootTask); + GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask); } // If a task contains an operator which instructs bucketizedhiveinputformat @@ -8348,36 +8348,6 @@ private void setInputFormat(Task task) { } } - // loop over all the tasks recursviely - private void setKeyDescTaskTree(Task task) { - - if (task instanceof ExecDriver) { - MapredWork work = (MapredWork) task.getWork(); - work.deriveExplainAttributes(); - HashMap> opMap = work - .getAliasToWork(); - if (!opMap.isEmpty()) { - for (Operator op : opMap.values()) { - GenMapRedUtils.setKeyAndValueDesc(work, op); - } - } - } else if (task instanceof ConditionalTask) { - List> listTasks = ((ConditionalTask) task) - .getListTasks(); - for (Task tsk : listTasks) { - setKeyDescTaskTree(tsk); - } - } - - if (task.getChildTasks() == null) { - return; - } - - for (Task childTask : task.getChildTasks()) { - setKeyDescTaskTree(childTask); - } - } - @SuppressWarnings("nls") public Phase1Ctx initPhase1Ctx() { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index ebee133..d121f7e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -239,6 +239,12 @@ public TableDesc getKeyDesc() { return keyDesc; } + /** + * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing + * to keySerializeInfo of the ReduceSink + * + * @param keyDesc + */ public void setKeyDesc(final TableDesc keyDesc) { this.keyDesc = keyDesc; } diff --git ql/src/test/queries/clientpositive/multiMapJoin1.q ql/src/test/queries/clientpositive/multiMapJoin1.q index 2dd4c94..ca0ddd2 100644 --- ql/src/test/queries/clientpositive/multiMapJoin1.q +++ ql/src/test/queries/clientpositive/multiMapJoin1.q @@ -69,6 +69,27 @@ select count(*) FROM JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value); +set hive.optimize.mapjoin.mapreduce=true; +explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +group by smallTbl2.key; + +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +group by smallTbl2.key; +set hive.optimize.mapjoin.mapreduce=false; + create table smallTbl3(key string, value string); insert overwrite table smallTbl3 select * from src where key < 10; diff --git ql/src/test/results/clientpositive/multiMapJoin1.q.out ql/src/test/results/clientpositive/multiMapJoin1.q.out index 6bb2ca8..dac4dc0 100644 --- ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -704,6 +704,204 @@ POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 580 +PREHOOK: query: explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +group by smallTbl2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +group by smallTbl2.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL smallTbl2) key)))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + firstjoin:smalltbl1 + Fetch Operator + limit: -1 + smalltbl2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + firstjoin:smalltbl1 + TableScan + alias: smalltbl1 + HashTable Sink Operator + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + smalltbl2 + TableScan + alias: smalltbl2 + HashTable Sink Operator + condition expressions: + 0 + 1 {key} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + firstjoin:bigtbl + TableScan + alias: bigtbl + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col1 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {key} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + outputColumnNames: _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col3 + type: string + outputColumnNames: _col3 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col3 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +group by smallTbl2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bigtbl +PREHOOK: Input: default@smalltbl1 +PREHOOK: Input: default@smalltbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +group by smallTbl2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bigtbl +POSTHOOK: Input: default@smalltbl1 +POSTHOOK: Input: default@smalltbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +270 +10 +10 +270 +10 +10 PREHOOK: query: create table smallTbl3(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table smallTbl3(key string, value string)