diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f038d04..2129331 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -446,6 +446,8 @@ public class HiveConf extends Configuration { HIVEJOBPROGRESS("hive.task.progress", false), HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"), + // use org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat + HIVEINPUTFORMATSMBJOIN("hive.input.format.smbjoin", null), HIVEENFORCEBUCKETING("hive.enforce.bucketing", false), HIVEENFORCESORTING("hive.enforce.sorting", false), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java index b731a9f..1d9e9ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java @@ -317,6 +317,10 @@ public class ExecDriver extends Task implements Serializable, Hadoop inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } + if (getWork().isSmbJoin()) { + inpFormat = HiveConf.getVar(job, ConfVars.HIVEINPUTFORMATSMBJOIN, inpFormat); + } + LOG.info("Using " + inpFormat); try { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 4abbd1f..e188cb8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -280,6 +280,7 @@ public final class GenMapRedUtils { localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); + plan.setSmbJoin(currMapJoinOp instanceof SMBMapJoinOperator); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index 2c2a3ec..f0f9466 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -90,6 +90,8 @@ public class MapredWork implements Serializable { // used to indicate the input is sorted, and so a BinarySearchRecordReader shoudl be used private boolean inputFormatSorted = false; + private transient boolean smbJoin; + public MapredWork() { aliasToPartnInfo = new LinkedHashMap(); } @@ -486,4 +488,11 @@ public class MapredWork implements Serializable { return returnList; } + public boolean isSmbJoin() { + return smbJoin; + } + + public void setSmbJoin(boolean smbJoin) { + this.smbJoin = smbJoin; + } }