diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 0d5ef33b8d..775dc2125e 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -376,7 +376,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vectorized_string_funcs.q,\ vectorized_timestamp.q,\ vectorized_timestamp_funcs.q,\ - vectorized_timestamp_ints_casts.q + vectorized_timestamp_ints_casts.q,\ + mapjoin_hook.q minillap.query.files=acid_bucket_pruning.q,\ add_part_with_loc.q,\ diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/MapJoinCounterHook.java itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/MapJoinCounterHook.java index 43cb8c9615..161ed3786e 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/MapJoinCounterHook.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/hooks/MapJoinCounterHook.java @@ -17,12 +17,21 @@ */ package org.apache.hadoop.hive.ql.hooks; +import java.io.Serializable; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskRunner; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; @@ -36,8 +45,9 @@ public void run(HookContext hookContext) { } QueryPlan plan = hookContext.getQueryPlan(); - String queryID = plan.getQueryId(); - // String query = SessionState.get().getCmd(); + LogHelper console = SessionState.getConsole(); + String engine = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + final String MR = "mr", TEZ = "tez", SPARK = "spark"; int commonJoin = 0; int hintedMapJoin = 0; @@ -45,36 +55,76 @@ public void run(HookContext hookContext) { int hintedMapJoinLocal = 0; int convertedMapJoinLocal = 0; int backupCommonJoin = 0; + int bucketMapJoin = 0; + int hybridHashJoin = 0; + int dynamicPartitionHashJoin = 0; - List list = hookContext.getCompleteTaskList(); - for (TaskRunner tskRunner : list) { - Task tsk = tskRunner.getTask(); - int tag = tsk.getTaskTag(); - switch (tag) { - case Task.COMMON_JOIN: - commonJoin++; - break; - case Task.HINTED_MAPJOIN: - hintedMapJoin++; - break; - case Task.HINTED_MAPJOIN_LOCAL: - hintedMapJoinLocal++; - break; - case Task.CONVERTED_MAPJOIN: - convertedMapJoin++; + switch (engine) { + case MR: + List list = hookContext.getCompleteTaskList(); + for (TaskRunner tskRunner : list) { + Task tsk = tskRunner.getTask(); + int tag = tsk.getTaskTag(); + switch (tag) { + case Task.COMMON_JOIN: + commonJoin++; + break; + case Task.HINTED_MAPJOIN: + hintedMapJoin++; + break; + case Task.HINTED_MAPJOIN_LOCAL: + hintedMapJoinLocal++; + break; + case Task.CONVERTED_MAPJOIN: + convertedMapJoin++; + break; + case Task.CONVERTED_MAPJOIN_LOCAL: + convertedMapJoinLocal++; + break; + case Task.BACKUP_COMMON_JOIN: + backupCommonJoin++; + break; + } + } break; - case Task.CONVERTED_MAPJOIN_LOCAL: - convertedMapJoinLocal++; + + case TEZ: + for(Task tezTask: plan.getRootTasks()) { + TezWork work = (TezWork) tezTask.getWork(); + Map workGraph = work.getWorkMap(); + for(Map.Entry baseWorkEntry : workGraph.entrySet()) { + Set> operatorSet = baseWorkEntry.getValue().getAllOperators(); + for(Operator operator : operatorSet) { + if(operator instanceof MapJoinOperator) { + MapJoinDesc mapJoinDesc = (MapJoinDesc) operator.getConf(); + if (mapJoinDesc.isBucketMapJoin()) { + bucketMapJoin++; + } + if (mapJoinDesc.isHybridHashJoin()) { + hybridHashJoin++; + } + if (mapJoinDesc.isDynamicPartitionHashJoin()) { + dynamicPartitionHashJoin++; + } + if (mapJoinDesc.isMapSideJoin()) { + convertedMapJoin++; + } + } else if(operator instanceof CommonMergeJoinOperator) { + commonJoin++; + } + } + } + } break; - case Task.BACKUP_COMMON_JOIN: - backupCommonJoin++; + + case SPARK: + //todo break; - } } - LogHelper console = SessionState.getConsole(); console.printError("[MapJoinCounter PostHook] COMMON_JOIN: " + commonJoin - + " HINTED_MAPJOIN: " + hintedMapJoin + " HINTED_MAPJOIN_LOCAL: " + hintedMapJoinLocal - + " CONVERTED_MAPJOIN: " + convertedMapJoin + " CONVERTED_MAPJOIN_LOCAL: " + convertedMapJoinLocal - + " BACKUP_COMMON_JOIN: " + backupCommonJoin); + + " HINTED_MAPJOIN: " + hintedMapJoin + " HINTED_MAPJOIN_LOCAL: " + hintedMapJoinLocal + + " CONVERTED_MAPJOIN: " + convertedMapJoin + " CONVERTED_MAPJOIN_LOCAL: " + convertedMapJoinLocal + + " BACKUP_COMMON_JOIN: " + backupCommonJoin +" BUCKET_MAP_JOIN: " + bucketMapJoin + + " HYBRID_HASH_JOIN: " + hybridHashJoin + " DYNAMIC_PARTITION_HASH_JOIN: " + dynamicPartitionHashJoin ); } } diff --git ql/src/test/queries/clientpositive/mapjoin_hook.q ql/src/test/queries/clientpositive/mapjoin_hook.q index c929356764..cf70d39568 100644 --- ql/src/test/queries/clientpositive/mapjoin_hook.q +++ ql/src/test/queries/clientpositive/mapjoin_hook.q @@ -30,6 +30,7 @@ where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src3.value; +set hive.auto.convert.join = false; diff --git ql/src/test/results/clientpositive/llap/mapjoin_hook.q.out ql/src/test/results/clientpositive/llap/mapjoin_hook.q.out new file mode 100644 index 0000000000..b12ecfd0b5 --- /dev/null +++ ql/src/test/results/clientpositive/llap/mapjoin_hook.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: drop table dest1_n171 +PREHOOK: type: DROPTABLE +RUN: Stage-0:DDL +PREHOOK: query: CREATE TABLE dest1_n171(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1_n171 +RUN: Stage-0:DDL +PREHOOK: query: INSERT OVERWRITE TABLE dest1_n171 +SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1_n171 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n171 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 2 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1_n171 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1_n171 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 2 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS diff --git ql/src/test/results/clientpositive/mapjoin_hook.q.out ql/src/test/results/clientpositive/mapjoin_hook.q.out index 2e02814edf..db5472cb85 100644 --- ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -12,7 +12,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1_n171 -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 RUN: Stage-6:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE @@ -22,7 +22,7 @@ INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1_n171 -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 RUN: Stage-8:MAPREDLOCAL RUN: Stage-6:MAPRED RUN: Stage-0:MOVE @@ -41,7 +41,7 @@ PREHOOK: Output: default@dest1_n171 Hive Runtime Error: Map local work exhausted memory FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 1 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 1 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 RUN: Stage-6:CONDITIONAL RUN: Stage-7:MAPREDLOCAL RUN: Stage-1:MAPRED @@ -58,7 +58,7 @@ ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask Hive Runtime Error: Map local work exhausted memory FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 2 +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 2 BUCKET_MAP_JOIN: 0 HYBRID_HASH_JOIN: 0 DYNAMIC_PARTITION_HASH_JOIN: 0 RUN: Stage-11:CONDITIONAL RUN: Stage-14:MAPREDLOCAL RUN: Stage-1:MAPRED