diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1672453..f68a6e3 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -414,7 +414,12 @@ true), HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000), HIVEJOINCACHESIZE("hive.join.cache.size", 25000), + + // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, + // need to remove by hive .13. Also, do not change default (see SMB operator) HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100), + + HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000), HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000), HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5), HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3), diff --git conf/hive-default.xml.template conf/hive-default.xml.template index 3a7d1dc..10144c7 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -589,9 +589,9 @@ - hive.mapjoin.bucket.cache.size - 100 - How many values in each keys in the map-joined table should be cached in memory. + hive.smbjoin.cache.rows + 10000 + How many rows with the same key value should be cached in memory per smb joined table. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java index 7ce221c..1a784b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java @@ -123,8 +123,18 @@ protected void initializeOp(Configuration hconf) throws HiveException { fetchDone = new boolean[maxAlias]; foundNextKeyGroup = new boolean[maxAlias]; - int bucketSize = HiveConf.getIntVar(hconf, - HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE); + int bucketSize; + + // For backwards compatibility reasons we honor the older + // HIVEMAPJOINBUCKETCACHESIZE if set different from default. + // By hive 0.13 we should remove this code. + int oldVar = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE); + if (oldVar != 100) { + bucketSize = oldVar; + } else { + bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESMBJOINCACHEROWS); + } + for (byte pos = 0; pos < order.length; pos++) { RowContainer rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors[pos],