diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 92a1c31..5a8870a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2023,12 +2023,29 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Whether to enable dynamically partitioned hash join optimization. \n" + "This setting is also dependent on enabling hive.auto.convert.join"), HIVECONVERTJOIN("hive.auto.convert.join", true, - "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size"), + "Whether hive enables the optimization for automatically determining join algorithm based on data size"), + HIVE_MAP_JOIN_CONVERSION_FACTOR("hive.map.join.conversion.factor", 0.3f, + new RangeValidator(0.0f, 1.0f), + "Percentage of executor memory size to use for making map join conversion decision. By default, if data size\n" + + "is less than 30% of container size map join will be chosen over shuffle join. Setting this value to <= 0.0f\n" + + "will disable map join conversions"), + HIVE_DYNAMIC_PARTITIONED_JOIN_CONVERSION_FACTOR("hive.auto.convert.join.shuffle.factor", 10.0f, + "If hive.auto.convert.join is false, this parameter does not take affect. \n" + + "However, if it is true, and the predicted size of the larger input for a given join is less than \n" + + "this factor times the executor memory size, the join will be converted to dynamically partitioned hash join.\n" + + "The value \"-1\" means no limit."), + /** + * @deprecated Use HIVECONVERTJOINPERCENT instead + */ + @Deprecated HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true, "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" + "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" + "specified size, the join is directly converted to a mapjoin (there is no conditional task)."), - + /** + * @deprecated Use HIVECONVERTJOINPERCENT instead + */ + @Deprecated HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", 10000000L, "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + @@ -2069,6 +2086,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "The value \"-1\" means no limit."), XPRODSMALLTABLEROWSTHRESHOLD("hive.xprod.mapjoin.small.table.rows", 1,"Maximum number of rows on build side" + " of map join before it switches over to cross product edge"), + /** + * @deprecated Use HIVE_DYNAMIC_PARTITIONED_JOIN_CONVERSION_FACTOR instead + */ + @Deprecated HIVECONVERTJOINMAXSHUFFLESIZE("hive.auto.convert.join.shuffle.max.size", 10000000000L, "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + "However, if it is on, and the predicted size of the larger input for a given join is greater \n" + diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index 0daf9ad..a7c349f 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -349,4 +349,9 @@ false + + hive.map.join.conversion.factor + 0.01 + + diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml index 44ca6c9..3ab0ed8 100644 --- a/data/conf/llap/hive-site.xml +++ b/data/conf/llap/hive-site.xml @@ -358,4 +358,9 @@ 1024 + + hive.map.join.conversion.factor + 0.01 + + diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml index 236adc7..d0e6b92 100644 --- a/data/conf/tez/hive-site.xml +++ b/data/conf/tez/hive-site.xml @@ -298,4 +298,9 @@ false + + hive.map.join.conversion.factor + 0.01 + + diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java index 40dd992..29cb6d6 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java @@ -914,8 +914,7 @@ private void setupTez(HiveConf conf) { private void setupMapJoin(HiveConf conf) { conf.setBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN, true); - conf.setBoolVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK, true); - conf.setLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD, 100000); + conf.setFloatVar(HiveConf.ConfVars.HIVE_MAP_JOIN_CONVERSION_FACTOR, 0.0001f); } private List runStatementOnDriver(String stmt) throws Exception { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MemoryInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MemoryInfo.java new file mode 100644 index 0000000..54f1d0b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MemoryInfo.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; +import org.apache.tez.mapreduce.hadoop.MRJobConfig; + +/** + * Contains information about executor memory, various memory thresholds used for join conversions etc. based on + * execution engine. + **/ + +public class MemoryInfo { + + private Configuration conf; + private boolean isTez; + private boolean isLlap; + private long maxExecutorMemory; + private long mapJoinMemoryThreshold; + private long dynPartJoinMemoryThreshold; + + public MemoryInfo(Configuration conf) { + this.isTez = "tez".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); + this.isLlap = "llap".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_MODE)); + if (isLlap) { + LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf); + llapInfo.initClusterInfo(); + if (llapInfo.hasClusterInfo()) { + this.maxExecutorMemory = llapInfo.getMemoryPerExecutor(); + } else { + long memPerInstance = + HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB) * 1024L * 1024L; + long numExecutors = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS); + this.maxExecutorMemory = memPerInstance / numExecutors; + } + } else { + if (isTez) { + float heapFraction = HiveConf.getFloatVar(conf, HiveConf.ConfVars.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION); + int containerSizeMb = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0 ? + HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) : + conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB); + // this can happen when config is explicitly set to "-1", in which case defaultValue also does not work + if (containerSizeMb < 0) { + containerSizeMb = MRJobConfig.DEFAULT_MAP_MEMORY_MB; + } + this.maxExecutorMemory = (long) ((containerSizeMb * 1024L * 1024L) * heapFraction); + } else { + this.maxExecutorMemory = + conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB) * 1024L * 1024L; + // this can happen when config is explicitly set to "-1", in which case defaultValue also does not work + if (maxExecutorMemory < 0) { + maxExecutorMemory = MRJobConfig.DEFAULT_MAP_MEMORY_MB * 1024L * 1024L; + } + } + } + float mapJoinFraction = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_MAP_JOIN_CONVERSION_FACTOR); + this.mapJoinMemoryThreshold = (long) (mapJoinFraction * maxExecutorMemory); + float dynPartJoinFraction = + HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_DYNAMIC_PARTITIONED_JOIN_CONVERSION_FACTOR); + this.dynPartJoinMemoryThreshold = (long) (dynPartJoinFraction * maxExecutorMemory); + } + + public Configuration getConf() { + return conf; + } + + public void setConf(final Configuration conf) { + this.conf = conf; + } + + public boolean isTez() { + return isTez; + } + + public boolean isLlap() { + return isLlap; + } + + public long getMaxExecutorMemory() { + return maxExecutorMemory; + } + + public long getMapJoinMemoryThreshold() { + return mapJoinMemoryThreshold; + } + + public long getDynPartJoinMemoryThreshold() { + return dynPartJoinMemoryThreshold; + } + + @Override + public String toString() { + return "MEMORY INFO - { isTez: " + isTez() + + ", isLlap: " + isLlap() + + ", maxExecutorMemory: " + LlapUtil.humanReadableByteCount(getMaxExecutorMemory()) + + ", mapJoinMemoryThreshold: "+ LlapUtil.humanReadableByteCount(getMapJoinMemoryThreshold()) + + ", dynPartJoinMemoryThreshold: " + LlapUtil.humanReadableByteCount(getDynPartJoinMemoryThreshold()) + + " }"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index a6b0dbc..da62071 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -625,7 +625,7 @@ private static void validateCapacity(long capacity) { throw new RuntimeException("Attempting to expand the hash table to " + capacity + " that overflows maximum array size. For this query, you may want to disable " + ConfVars.HIVEDYNAMICPARTITIONHASHJOIN.varname + " or reduce " - + ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD.varname); + + ConfVars.HIVE_MAP_JOIN_CONVERSION_FACTOR.varname); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 5437742..f7558cf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -649,7 +649,7 @@ public long spillPartition(int partitionId) throws IOException { int inMemRowCount = partition.hashMap.getNumValues(); if (inMemRowCount == 0) { LOG.warn("Trying to spill an empty hash partition! It may be due to " + - "hive.auto.convert.join.noconditionaltask.size being set too low."); + HiveConf.ConfVars.HIVE_MAP_JOIN_CONVERSION_FACTOR.varname + " being set too low."); } File file = FileUtils.createLocalDirsTempFile( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index 151d1b3..14d4330 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -25,6 +25,7 @@ import java.util.Map; import org.apache.hadoop.hive.llap.LlapDaemonInfo; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; import org.slf4j.Logger; @@ -99,8 +100,9 @@ public void load(MapJoinTableContainer[] mapJoinTables, long totalMapJoinMemory = desc.getMemoryNeeded(); LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable."); if (totalMapJoinMemory <= 0) { - totalMapJoinMemory = HiveConf.getLongVar( - hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + MemoryInfo memoryInfo = new MemoryInfo(hconf); + LOG.info("Memory info during hash table load: {}", memoryInfo); + totalMapJoinMemory = memoryInfo.getMapJoinMemoryThreshold(); } long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index b4cc76a..06f5f9c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; import org.apache.hadoop.hive.ql.exec.MuxOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -402,7 +403,9 @@ private static long hashTableDataSizeAdjustment(long numRows, List0.0f, for a JoinOperator // that has both intermediate tables and query input tables as input tables, // we should be able to guess if this JoinOperator will be converted to a MapJoin - // based on hive.auto.convert.join.noconditionaltask.size. + // based on hive.map.join.conversion.factor. for (JoinOperator joinOp: pCtx.getJoinOps()) { boolean isAbleToGuess = true; boolean mayConvert = false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 5269eb6..e4f27c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer.correlation; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOIN; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -74,7 +73,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // for auto convert map-joins, it not safe to dedup in here (todo) boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) && - !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK) && + !(pctx.getConf().getFloatVar(ConfVars.HIVE_MAP_JOIN_CONVERSION_FACTOR) > 0.0f) && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java index a84534a..fb4d1da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java @@ -41,8 +41,8 @@ * Note that the sizes of all the inputs may not be available at compile time. At runtime, it is * determined which branch we want to pick up from the above. * - * However, if hive.auto.convert.join.noconditionaltask is set to true, and - * the sum of any n-1 tables is smaller than hive.auto.convert.join.noconditionaltask.size, + * However, if hive.map.join.conversion.factor >0.0f, and + * the sum of any n-1 tables is smaller than hive.map.join.conversion.factor, * then a mapjoin is created instead of the conditional task. For the above, if the size of * T1 + T2 is less than the threshold, then the task is converted to a mapjoin task with T3 as * the big table. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index a52e5e6..f80b72d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor; import org.apache.hadoop.hive.ql.parse.ParseContext; @@ -60,6 +62,8 @@ import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /* * Convert tasks involving JOIN into MAPJOIN. @@ -77,8 +81,8 @@ * Note that the sizes of all the inputs may not be available at compile time. At runtime, it is * determined which branch we want to pick up from the above. * - * However, if hive.auto.convert.join.noconditionaltask is set to true, and - * the sum of any n-1 tables is smaller than hive.auto.convert.join.noconditionaltask.size, + * However, if hive.map.join.conversion.factor is >0.0f, and + * the sum of any n-1 tables is smaller than hive.map.join.conversion.factor, * then a mapjoin is created instead of the conditional task. For the above, if the size of * T1 + T2 is less than the threshold, then the task is converted to a mapjoin task with T3 as * the big table. @@ -106,7 +110,7 @@ * make current task depends on this new generated task */ public class CommonJoinTaskDispatcher extends AbstractJoinTaskDispatcher implements Dispatcher { - + private static final Logger LOG = LoggerFactory.getLogger(CommonJoinTaskDispatcher.class.getName()); HashMap aliasToSize = null; public CommonJoinTaskDispatcher(PhysicalContext context) { @@ -149,8 +153,8 @@ private long calculateLocalTableTotalSize(MapredLocalWork localWork) { private boolean isLocalTableTotalSizeUnderLimitAfterMerge( Configuration conf, MapredLocalWork... localWorks) { - final long localTableTotalSizeLimit = HiveConf.getLongVar(conf, - HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + MemoryInfo memoryInfo = new MemoryInfo(conf); + final long localTableTotalSizeLimit = memoryInfo.getMapJoinMemoryThreshold(); long localTableTotalSize = 0; for (int i = 0; i < localWorks.length; i++) { final long localWorkTableTotalSize = calculateLocalTableTotalSize(localWorks[i]); @@ -423,13 +427,14 @@ public static boolean cannotConvert(long aliasKnownSize, // If sizes of at least n-1 tables in a n-way join is known, and their sum is smaller than // the threshold size, convert the join into map-join and don't create a conditional task - boolean convertJoinMapJoin = HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK); + boolean convertJoinMapJoin = HiveConf.getFloatVar(conf, + HiveConf.ConfVars.HIVE_MAP_JOIN_CONVERSION_FACTOR) > 0.0f; int bigTablePosition = -1; if (convertJoinMapJoin) { + MemoryInfo memoryInfo = new MemoryInfo(conf); + LOG.info(memoryInfo.toString()); // This is the threshold that the user has specified to fit in mapjoin - long mapJoinSize = HiveConf.getLongVar(conf, - HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + long mapJoinSize = memoryInfo.getMapJoinMemoryThreshold(); Long bigTableSize = null; Set aliases = aliasToWork.keySet(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java index 0d1990a..a15bdd5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapClusterStateForCompile.java @@ -44,6 +44,7 @@ private volatile Long lastClusterUpdateNs; private volatile Integer noConfigNodeCount, executorCount; private volatile int numExecutorsPerNode = -1; + private volatile long memoryPerInstance = -1; private LlapRegistryService svc; private final Configuration conf; private final long updateIntervalNs; @@ -93,6 +94,14 @@ public int getNumExecutorsPerNode() { return numExecutorsPerNode; } + public long getMemoryPerInstance() { + return memoryPerInstance; + } + + public long getMemoryPerExecutor() { + return getMemoryPerInstance() / getNumExecutorsPerNode(); + } + private boolean isUpdateNeeded() { Long lastUpdateLocal = lastClusterUpdateNs; if (lastUpdateLocal == null) return true; @@ -134,6 +143,9 @@ public boolean initClusterInfo() { if (numExecutorsPerNode == -1) { numExecutorsPerNode = numExecutors; } + if (memoryPerInstance == -1) { + memoryPerInstance = si.getResource().getMemorySize() * 1024L * 1024L; + } } catch (NumberFormatException e) { ++noConfigNodesLocal; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java index 64f1e7b..b2da625 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -74,7 +75,9 @@ public MemoryCalculator(PhysicalContext pctx) { this.pctx = pctx; - this.totalAvailableMemory = HiveConf.getLongVar(pctx.conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + MemoryInfo memoryInfo = new MemoryInfo(pctx.conf); + LOG.info("Memory info during memory decider: {}", memoryInfo); + this.totalAvailableMemory = memoryInfo.getMapJoinMemoryThreshold(); this.minimumHashTableSize = HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS) * HiveConf.getIntVar(pctx.conf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE); this.inflationFactor = HiveConf.getFloatVar(pctx.conf, HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index ad6db21..aee48f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -27,6 +27,7 @@ import com.google.common.collect.Sets; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TerminalOperator; @@ -191,7 +192,7 @@ private int convertJoinBucketMapJoin(JoinOperator joinOp, MapJoinOperator mapJoi MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); long maxSize = context.getConf().getLongVar( - HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); int bigTablePosition = -1; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 22f3266..cecfbaa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -127,6 +127,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.RowSchema; @@ -401,8 +402,9 @@ private static RelOptPlanner createPlanner( HiveConf conf, Set corrScalarRexSQWithAgg, Set scalarAggNoGbyNoWin) { final Double maxSplitSize = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); - final Double maxMemory = (double) HiveConf.getLongVar( - conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + MemoryInfo memoryInfo = new MemoryInfo(conf); + STATIC_LOG.info("{}", memoryInfo); + final Double maxMemory = (double) memoryInfo.getMaxExecutorMemory(); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); HiveRulesRegistry registry = new HiveRulesRegistry(); Properties calciteConfigProperties = new Properties(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index fdc9635..2fafcf6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.MemoryInfo; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -595,12 +596,13 @@ protected void generateTaskTree(List> rootTasks, Pa GraphWalker ogw = new GenTezWorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); + MemoryInfo memoryInfo = new MemoryInfo(conf); + LOG.info("Memory info in tez compiler: {}", memoryInfo); // we need to specify the reserved memory for each work that contains Map Join for (List baseWorkList : procCtx.mapJoinWorkMap.values()) { for (BaseWork w : baseWorkList) { // work should be the smallest unit for memory allocation - w.setReservedMemoryMB( - (int)(conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024))); + w.setReservedMemoryMB((int)(memoryInfo.getMapJoinMemoryThreshold() / (1024 * 1024))); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index c7cd4ad..9cb5a88 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -443,9 +443,9 @@ public void testFetchOperatorContext() throws Exception { @Test public void testNoConditionalTaskSizeForLlap() { ConvertJoinMapJoin convertJoinMapJoin = new ConvertJoinMapJoin(); - long defaultNoConditionalTaskSize = 1024L * 1024L * 1024L; HiveConf hiveConf = new HiveConf(); - hiveConf.setLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD, defaultNoConditionalTaskSize); + MemoryInfo memoryInfo = new MemoryInfo(hiveConf); + long defaultNoConditionalTaskSize = memoryInfo.getMapJoinMemoryThreshold(); LlapClusterStateForCompile llapInfo = null; if ("llap".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) { diff --git a/ql/src/test/queries/clientpositive/acid_mapjoin.q b/ql/src/test/queries/clientpositive/acid_mapjoin.q index e3d14cc..140dbb6 100644 --- a/ql/src/test/queries/clientpositive/acid_mapjoin.q +++ b/ql/src/test/queries/clientpositive/acid_mapjoin.q @@ -1,6 +1,5 @@ set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_1.q b/ql/src/test/queries/clientpositive/autoColumnStats_1.q index 9a62127..d79aa9a 100644 --- a/ql/src/test/queries/clientpositive/autoColumnStats_1.q +++ b/ql/src/test/queries/clientpositive/autoColumnStats_1.q @@ -9,8 +9,7 @@ set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.auto.convert.join=true; set hive.join.emit.interval=2; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; set hive.optimize.bucketingsorting=false; diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_2.q b/ql/src/test/queries/clientpositive/autoColumnStats_2.q index 51f252b..36bba82 100644 --- a/ql/src/test/queries/clientpositive/autoColumnStats_2.q +++ b/ql/src/test/queries/clientpositive/autoColumnStats_2.q @@ -9,8 +9,7 @@ set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.auto.convert.join=true; set hive.join.emit.interval=2; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; set hive.optimize.bucketingsorting=false; diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_3.q b/ql/src/test/queries/clientpositive/autoColumnStats_3.q index 7a41911..5703055 100644 --- a/ql/src/test/queries/clientpositive/autoColumnStats_3.q +++ b/ql/src/test/queries/clientpositive/autoColumnStats_3.q @@ -5,8 +5,7 @@ set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.auto.convert.join=true; set hive.join.emit.interval=2; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; set hive.optimize.bucketingsorting=false; diff --git a/ql/src/test/queries/clientpositive/auto_join25.q b/ql/src/test/queries/clientpositive/auto_join25.q index 2882341..01867b0 100644 --- a/ql/src/test/queries/clientpositive/auto_join25.q +++ b/ql/src/test/queries/clientpositive/auto_join25.q @@ -8,7 +8,7 @@ set hive.auto.convert.join = true; set hive.mapjoin.followby.gby.localtask.max.memory.usage = 0.0001; -- HIVE-18445 set hive.mapjoin.localtask.max.memory.usage = 0.0001; set hive.mapjoin.check.memory.rows = 2; -set hive.auto.convert.join.noconditionaltask = false; +set hive.map.join.conversion.factor=0.0f; -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin CREATE TABLE dest1_n62(key INT, value STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/auto_join_reordering_values.q b/ql/src/test/queries/clientpositive/auto_join_reordering_values.q index 39b2e1d..a688378 100644 --- a/ql/src/test/queries/clientpositive/auto_join_reordering_values.q +++ b/ql/src/test/queries/clientpositive/auto_join_reordering_values.q @@ -10,7 +10,7 @@ drop table if exists user_small; create table user_small( userid int); insert overwrite table user_small select key from testsrc tablesample (100 rows); -set hive.auto.convert.join.noconditionaltask.size = 200; +set hive.map.join.conversion.factor=0.0000002f; explain extended SELECT `dim_pay_date`.`date` , `deal`.`dealid` diff --git a/ql/src/test/queries/clientpositive/auto_join_stats.q b/ql/src/test/queries/clientpositive/auto_join_stats.q index 8b377bf..790d245 100644 --- a/ql/src/test/queries/clientpositive/auto_join_stats.q +++ b/ql/src/test/queries/clientpositive/auto_join_stats.q @@ -1,7 +1,7 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.auto.convert.join = true; -set hive.auto.convert.join.noconditionaltask.size=2660; +set hive.map.join.conversion.factor=0.0000025f; -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable_n0). -- This query plan should thus not try to combine the mapjoin into a single work. diff --git a/ql/src/test/queries/clientpositive/auto_join_without_localtask.q b/ql/src/test/queries/clientpositive/auto_join_without_localtask.q index 15697f2..1ae3340 100644 --- a/ql/src/test/queries/clientpositive/auto_join_without_localtask.q +++ b/ql/src/test/queries/clientpositive/auto_join_without_localtask.q @@ -5,7 +5,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org. set hive.auto.convert.join=true; set hive.auto.convert.join.use.nonstaged=true; -set hive.auto.convert.join.noconditionaltask.size=100; +set hive.map.join.conversion.factor=0.0000001f; explain select a.* from src a join src b on a.key=b.key order by key, value limit 40; @@ -17,7 +17,7 @@ select a.* from src a join src b on a.key=b.key join src c on a.value=c.value or select a.* from src a join src b on a.key=b.key join src c on a.value=c.value order by a.key, a.value limit 40; -set hive.auto.convert.join.noconditionaltask.size=100; +set hive.map.join.conversion.factor=0.0000001f; explain select a.* from src a join src b on a.key=b.key join src c on a.value=c.value where a.key>100 order by a.key, a.value limit 40; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q index 1fbe8f7..5097c3f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q @@ -28,7 +28,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small_n1 a JOIN bucket_big_n1 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q index 1b15a74..15b0efc 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q @@ -22,7 +22,7 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.auto.convert.sortmerge.join=true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.000000001f; -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. explain diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q index 76e615f..479c59d 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q @@ -21,7 +21,7 @@ load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO set hive.auto.convert.join=true; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; set hive.auto.convert.sortmerge.join=false; explain extended select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key; select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q index d68efea..68cc82b 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q @@ -25,7 +25,7 @@ set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index bbd7afa..e023033 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -23,7 +23,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; -- A SMB join followed by a mutli-insert -set hive.auto.convert.join.noconditionaltask.size=200; +set hive.map.join.conversion.factor=0.0000002f; explain from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -42,7 +42,6 @@ INSERT OVERWRITE TABLE dest2_n4 select value1, value2; select * from dest1_n20; select * from dest2_n4; -set hive.auto.convert.join.noconditionaltask=true; set hive.mapjoin.hybridgrace.minwbsize=100; set hive.mapjoin.hybridgrace.minnumpartitions=2; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q index 1e21c92..9e1e8e0 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q @@ -19,7 +19,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; -- Since tbl1_n7 is the bigger table, tbl1_n7 Left Outer Join tbl2_n6 can be performed explain diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q index b3dd8e5..8ee980f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q @@ -19,7 +19,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; explain select count(*) FROM tbl1_n11 a LEFT OUTER JOIN tbl2_n10 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q index 0a72ddf..de004f4 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q @@ -88,7 +88,7 @@ on ; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.0f; set hive.auto.convert.sortmerge.join=true; explain select a.key , a.value , b.value , 'day1' as day, 1 as pri diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q index c6d5318..aeff34b 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q @@ -23,7 +23,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly explain extended select count(*) FROM bucket_big_n3 a JOIN bucket_small_n3 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q index f41b097..6678a99 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q @@ -23,7 +23,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=100; +set hive.map.join.conversion.factor=0.0f; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small_n9 a JOIN bucket_big_n9 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q index e28e0ab..7086e46 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q @@ -25,7 +25,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=200; +set hive.map.join.conversion.factor=0.0f; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small_n12 a JOIN bucket_big_n12 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q index 9550e21..ca5ef0a 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q @@ -22,7 +22,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ; --disable hash joins -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.0f; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small_n0 a JOIN bucket_big_n0 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q index 0aeec0e..e4bac86 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q @@ -19,8 +19,7 @@ set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=200; +set hive.map.join.conversion.factor=0.0000002f; set hive.auto.convert.sortmerge.join.to.mapjoin=false; -- A SMB join is being followed by a regular join on a non-bucketed table on a different key diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q index 8c9dbac..34c2eaa 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q @@ -28,7 +28,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small_n6 a JOIN bucket_big_n6 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q index 51647e8..6dd23d1 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q @@ -31,7 +31,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; --disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.0f; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small_n5 a JOIN bucket_big_n5 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q index 5696682..22b2b25 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q @@ -24,7 +24,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join=true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; --disable hash joins -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.0f; -- The join is being performed as part of sub-query. It should be converted to a sort-merge join explain diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q index 049a4d9..7a1608c 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q @@ -3,8 +3,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; CREATE TABLE srcbucket_mapjoin_n14(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; CREATE TABLE tab_part_n9 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; @@ -48,7 +47,7 @@ select a.key, a.value, b.value from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value; -set hive.auto.convert.join.noconditionaltask.size=900; +set hive.map.join.conversion.factor=0.00000084f; set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) @@ -123,7 +122,7 @@ join -- one side is really bucketed. srcbucket_mapjoin_n14 is not really a bucketed table. -- In this case the sub-query is chosen as the big table. set hive.convert.join.bucket.mapjoin.tez = false; -set hive.auto.convert.join.noconditionaltask.size=1000; +set hive.map.join.conversion.factor=0.00000093f; explain select a.k1, a.v1, b.value from (select sum(substr(srcbucket_mapjoin_n14.value,5)) as v1, key as k1 from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a @@ -158,7 +157,7 @@ from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab_n8 x join tab_n8 -- multi-way join set hive.convert.join.bucket.mapjoin.tez = false; -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; explain select a.key, a.value, b.value from tab_part_n9 a join tab_n8 b on a.key = b.key join tab_n8 c on a.key = c.key; @@ -178,7 +177,7 @@ from (select x.key, x.value from tab_part_n9 x join tab_n8 y on x.key = y.key) a -- in this case sub-query is the small table set hive.convert.join.bucket.mapjoin.tez = false; -set hive.auto.convert.join.noconditionaltask.size=900; +set hive.map.join.conversion.factor=0.00000084f; explain select a.key, a.value, b.value from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a @@ -215,7 +214,7 @@ CREATE TABLE tab1_n4(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS ST insert overwrite table tab1_n4 select key,value from srcbucket_mapjoin_n14; -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value @@ -227,7 +226,7 @@ from tab1_n4 a join tab_part_n9 b on a.key = b.key; -- No map joins should be created. set hive.convert.join.bucket.mapjoin.tez = false; -set hive.auto.convert.join.noconditionaltask.size=15000; +set hive.map.join.conversion.factor=0.000014f; explain select a.key, b.key from tab_part_n9 a join tab_part_n9 c on a.key = c.key join tab_part_n9 b on a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from tab_part_n9 a join tab_part_n9 c on a.key = c.key join tab_part_n9 b on a.value = b.value; @@ -243,7 +242,7 @@ select a.key, a.value, b.value from tab_n8 a join tab_part_n9 b on a.key = b.key and a.ds = b.ds; -- HIVE-17792 : Enable Bucket Map Join when there are extra keys other than bucketed columns -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value; diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q index ec32e9c..ee15c8a 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q @@ -5,8 +5,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=30000; +set hive.map.join.conversion.factor=0.000028f; set hive.llap.memory.oversubscription.max.executors.per.query=3; CREATE TABLE srcbucket_mapjoin_n18(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; @@ -36,7 +35,7 @@ analyze table srcbucket_mapjoin_part_n20 compute statistics for columns; analyze table tab_n10 compute statistics for columns; analyze table tab_part_n11 compute statistics for columns; -set hive.auto.convert.join.noconditionaltask.size=3500; +set hive.map.join.conversion.factor=0.0000033f; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, b.key from tab_part_n11 a join tab_part_n11 c on a.key = c.key join tab_part_n11 b on a.value = b.value; set hive.convert.join.bucket.mapjoin.tez = true; @@ -48,7 +47,7 @@ select key,value from srcbucket_mapjoin_n18; analyze table tab1_n5 compute statistics for columns; -- A negative test as src is not bucketed. -set hive.auto.convert.join.noconditionaltask.size=12000; +set hive.map.join.conversion.factor=0.000011f; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, a.value, b.value @@ -58,7 +57,7 @@ explain select a.key, a.value, b.value from tab1_n5 a join src b on a.key = b.key; -set hive.auto.convert.join.noconditionaltask.size=2500; +set hive.map.join.conversion.factor=0.0000023f; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, b.key from (select key from tab_part_n11 where key > 1) a join (select key from tab_part_n11 where key > 2) b on a.key = b.key; @@ -80,7 +79,7 @@ set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from (select key from tab_part_n11 where key > 1) a right outer join (select key from tab_part_n11 where key > 2) b on a.key = b.key; -set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.map.join.conversion.factor=0.0000019f; set hive.convert.join.bucket.mapjoin.tez = false; explain select a.key, b.key from (select distinct key from tab_n10) a join tab_n10 b on b.key = a.key; set hive.convert.join.bucket.mapjoin.tez = true; @@ -99,7 +98,7 @@ insert overwrite table tab_part1 partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part_n20; analyze table tab_part1 compute statistics for columns; -set hive.auto.convert.join.noconditionaltask.size=12000; +set hive.map.join.conversion.factor=0.000011f; set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) @@ -129,7 +128,7 @@ insert overwrite table tab_part_ext partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part_n20; analyze table tab_part_ext compute statistics for columns; -set hive.auto.convert.join.noconditionaltask.size=3500; +set hive.map.join.conversion.factor=0.0000033f; set hive.convert.join.bucket.mapjoin.tez = true; set hive.disable.unsafe.external.table.operations=true; set test.comment=Bucket map join should work here; diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q index 359560a..1532108 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez_empty.q @@ -1,8 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; CREATE TABLE tab1_n0(key1 int, value string) CLUSTERED BY (key1) INTO 10 BUCKETS STORED AS TEXTFILE; CREATE TABLE tab2_n0 (key1 int, value string) CLUSTERED BY (key1) INTO 10 BUCKETS STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q index 0b913ca..dccdb4d 100644 --- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q +++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q @@ -14,7 +14,7 @@ set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop set hive.auto.convert.sortmerge.join.to.mapjoin=true; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.000000001f; -- Create two bucketed and sorted tables CREATE TABLE test_table1_n0 (key INT, value STRING) PARTITIONED BY (ds STRING) diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q index 2d4907c..71a10b6 100644 --- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q +++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q @@ -30,7 +30,7 @@ INSERT OVERWRITE TABLE test_table2_n3 PARTITION (ds = '1') SELECT key, key+1, va -- Insert data into the bucketed table by selecting from another bucketed table -- This should be a map-only operation, since the sort-order matches -set hive.auto.convert.join.noconditionaltask.size=400; +set hive.map.join.conversion.factor=0.0000004f; EXPLAIN INSERT OVERWRITE TABLE test_table3_n3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q index bef48b2..59ed72a 100644 --- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q +++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q @@ -14,7 +14,7 @@ set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop set hive.auto.convert.sortmerge.join.to.mapjoin=true; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.000000001f; -- Create two bucketed and sorted tables CREATE TABLE test_table1_n20 (key INT, value STRING) PARTITIONED BY (ds STRING) diff --git a/ql/src/test/queries/clientpositive/cbo_rp_cross_product_check_2.q b/ql/src/test/queries/clientpositive/cbo_rp_cross_product_check_2.q index 00c19c7..4a26ddc 100644 --- a/ql/src/test/queries/clientpositive/cbo_rp_cross_product_check_2.q +++ b/ql/src/test/queries/clientpositive/cbo_rp_cross_product_check_2.q @@ -12,8 +12,7 @@ select * from src order by key limit 10; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.01f; explain select * from A_n18 join B_n14; diff --git a/ql/src/test/queries/clientpositive/convert_decimal64_to_decimal.q b/ql/src/test/queries/clientpositive/convert_decimal64_to_decimal.q index c76057c..14fab56 100644 --- a/ql/src/test/queries/clientpositive/convert_decimal64_to_decimal.q +++ b/ql/src/test/queries/clientpositive/convert_decimal64_to_decimal.q @@ -2,8 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer5.q b/ql/src/test/queries/clientpositive/correlationoptimizer5.q index d75a48f..4b84e7c 100644 --- a/ql/src/test/queries/clientpositive/correlationoptimizer5.q +++ b/ql/src/test/queries/clientpositive/correlationoptimizer5.q @@ -54,8 +54,7 @@ JOIN ON b.key = d.key; set hive.optimize.correlation=true; -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=10000000000; +set hive.map.join.conversion.factor=0.99f; -- Enable hive.auto.convert.join. EXPLAIN INSERT OVERWRITE TABLE dest_co3 diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer7.q b/ql/src/test/queries/clientpositive/correlationoptimizer7.q index 0d57d7e..f90cf07 100644 --- a/ql/src/test/queries/clientpositive/correlationoptimizer7.q +++ b/ql/src/test/queries/clientpositive/correlationoptimizer7.q @@ -39,8 +39,7 @@ JOIN src1 yy ON xx.key=yy.key; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000000000; +set hive.map.join.conversion.factor=0.99f; set hive.optimize.correlation=false; -- Without correlation optimizer, we will have 3 MR jobs. diff --git a/ql/src/test/queries/clientpositive/cross_join.q b/ql/src/test/queries/clientpositive/cross_join.q index 041f0c3..b7a5dfa 100644 --- a/ql/src/test/queries/clientpositive/cross_join.q +++ b/ql/src/test/queries/clientpositive/cross_join.q @@ -8,8 +8,7 @@ explain select src.key from src cross join src src2; -- appending condition is allowed explain select src.key from src cross join src src2 on src.key=src2.key; -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.01f; set hive.mapjoin.hybridgrace.hashtable=true; explain select src.key from src join src src2; diff --git a/ql/src/test/queries/clientpositive/cross_product_check_1.q b/ql/src/test/queries/clientpositive/cross_product_check_1.q index 23c6638..4762bea 100644 --- a/ql/src/test/queries/clientpositive/cross_product_check_1.q +++ b/ql/src/test/queries/clientpositive/cross_product_check_1.q @@ -10,7 +10,7 @@ create table B_n6 as select * from src limit 10; -set hive.auto.convert.join.noconditionaltask.size=100; +set hive.map.join.conversion.factor=0.0000001f; explain select * from A_n8 join B_n6; diff --git a/ql/src/test/queries/clientpositive/cross_product_check_2.q b/ql/src/test/queries/clientpositive/cross_product_check_2.q index 329e878..c59129f 100644 --- a/ql/src/test/queries/clientpositive/cross_product_check_2.q +++ b/ql/src/test/queries/clientpositive/cross_product_check_2.q @@ -11,8 +11,7 @@ select * from src order by key limit 10; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.01f; explain select * from A_n2 join B_n2; diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q index de339f8..8323f84 100644 --- a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q @@ -125,8 +125,7 @@ EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask = true; -set hive.auto.convert.join.noconditionaltask.size = 10000000; +set hive.map.join.conversion.factor=0.01f; -- single column, single key EXPLAIN select count(*) from srcpart join srcpart_date_n2 on (srcpart.ds = srcpart_date_n2.ds) where srcpart_date_n2.`date` = '2008-04-08'; diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q b/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q index f3d94d5..2a326bc 100644 --- a/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q +++ b/ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q @@ -7,8 +7,7 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask = true; -set hive.auto.convert.join.noconditionaltask.size = 10000000; +set hive.map.join.conversion.factor=0.01f; create table dim_shops (id int, label string) row format delimited fields terminated by ',' stored as textfile; load data local inpath '../../data/files/dim_shops.txt' into table dim_shops; diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index 32c2854..c2aa714 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -81,8 +81,7 @@ set hive.tez.dynamic.semijoin.reduction=false; -- With Mapjoins, there shouldn't be any semijoin parallel to mapjoin. set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=100000000000; +set hive.map.join.conversion.factor=0.99f; EXPLAIN select count(*) from srcpart_date_n7 join srcpart_small_n3 on (srcpart_date_n7.key = srcpart_small_n3.key1); select count(*) from srcpart_date_n7 join srcpart_small_n3 on (srcpart_date_n7.key = srcpart_small_n3.key1); diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q index 20b657e..c7a441f 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q @@ -68,7 +68,7 @@ select key,value from srcbucket_mapjoin_n20; set hive.convert.join.bucket.mapjoin.tez = true; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=0; +set hive.map.join.conversion.factor=0.0f; set hive.mapjoin.hybridgrace.minwbsize=125; set hive.mapjoin.hybridgrace.minnumpartitions=4; diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q index 5e3a1b8..6be2752 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q @@ -80,8 +80,7 @@ set hive.tez.dynamic.semijoin.reduction=false; -- With Mapjoins. set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=100000000000; +set hive.map.join.conversion.factor=0.99f; EXPLAIN select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1); select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1); diff --git a/ql/src/test/queries/clientpositive/explainanalyze_1.q b/ql/src/test/queries/clientpositive/explainanalyze_1.q index 9043826..b32e0a7 100644 --- a/ql/src/test/queries/clientpositive/explainanalyze_1.q +++ b/ql/src/test/queries/clientpositive/explainanalyze_1.q @@ -33,8 +33,7 @@ explain analyze select * from src a union all select * from src b; select count(*) from (select * from src a union all select * from src b)subq; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; EXPLAIN analyze SELECT x.key, y.value diff --git a/ql/src/test/queries/clientpositive/explainanalyze_2.q b/ql/src/test/queries/clientpositive/explainanalyze_2.q index 7e48a8a..95d6563 100644 --- a/ql/src/test/queries/clientpositive/explainanalyze_2.q +++ b/ql/src/test/queries/clientpositive/explainanalyze_2.q @@ -31,8 +31,7 @@ JOIN (select key, value from src1 union select key, value from src union select set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; CREATE TABLE srcbucket_mapjoin_n11(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; @@ -64,7 +63,7 @@ select key,value from srcbucket_mapjoin_n11; set hive.convert.join.bucket.mapjoin.tez = false; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.0000005f; explain analyze select s1.key as key, s1.value as value from tab_n6 s1 join tab_n6 s3 on s1.key=s3.key; diff --git a/ql/src/test/queries/clientpositive/explainanalyze_3.q b/ql/src/test/queries/clientpositive/explainanalyze_3.q index 1f31218..3bcc7ab 100644 --- a/ql/src/test/queries/clientpositive/explainanalyze_3.q +++ b/ql/src/test/queries/clientpositive/explainanalyze_3.q @@ -136,8 +136,7 @@ explain analyze insert overwrite table orc_merge5_n1 select userid,string1,subty drop table orc_merge5_n1; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; CREATE TABLE srcbucket_mapjoin_n4(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; CREATE TABLE tab_part_n3 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/explainanalyze_4.q b/ql/src/test/queries/clientpositive/explainanalyze_4.q index 6daba22..1197484 100644 --- a/ql/src/test/queries/clientpositive/explainanalyze_4.q +++ b/ql/src/test/queries/clientpositive/explainanalyze_4.q @@ -56,7 +56,7 @@ order by c1; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.map.join.conversion.factor=0.00018f; set hive.stats.fetch.column.stats=false; set hive.exec.reducers.bytes.per.reducer=200000; diff --git a/ql/src/test/queries/clientpositive/explainuser_1.q b/ql/src/test/queries/clientpositive/explainuser_1.q index 23bdb79..c455616 100644 --- a/ql/src/test/queries/clientpositive/explainuser_1.q +++ b/ql/src/test/queries/clientpositive/explainuser_1.q @@ -390,8 +390,7 @@ drop table sales_n0; drop table things_n0; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.stats.fetch.column.stats=false; set hive.mapjoin.optimized.hashtable=false; diff --git a/ql/src/test/queries/clientpositive/explainuser_2.q b/ql/src/test/queries/clientpositive/explainuser_2.q index bc795cf..b45a08d 100644 --- a/ql/src/test/queries/clientpositive/explainuser_2.q +++ b/ql/src/test/queries/clientpositive/explainuser_2.q @@ -103,8 +103,7 @@ JOIN (select key, value from src1 union select key, value from src union select set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=30000; +set hive.map.join.conversion.factor=0.000028f; set hive.stats.fetch.column.stats=false; @@ -165,8 +164,7 @@ JOIN (select key, value from src1 union select key, value from src union select set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000018f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; CREATE TABLE srcbucket_mapjoin_n22(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; @@ -198,7 +196,7 @@ select key,value from srcbucket_mapjoin_n22; set hive.convert.join.bucket.mapjoin.tez = false; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.map.join.conversion.factor=0.0000018f; explain select s1.key as key, s1.value as value from tab_n15 s1 join tab_n15 s3 on s1.key=s3.key; diff --git a/ql/src/test/queries/clientpositive/explainuser_3.q b/ql/src/test/queries/clientpositive/explainuser_3.q index 0c110ae..81dfd5c 100644 --- a/ql/src/test/queries/clientpositive/explainuser_3.q +++ b/ql/src/test/queries/clientpositive/explainuser_3.q @@ -134,8 +134,7 @@ explain insert overwrite table orc_merge5_n0 select userid,string1,subtype,decim drop table orc_merge5_n0; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; CREATE TABLE srcbucket_mapjoin_n3(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; CREATE TABLE tab_part_n2 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/explainuser_4.q b/ql/src/test/queries/clientpositive/explainuser_4.q index 2125d09..e504fb3 100644 --- a/ql/src/test/queries/clientpositive/explainuser_4.q +++ b/ql/src/test/queries/clientpositive/explainuser_4.q @@ -55,7 +55,7 @@ order by c1, a.csmallint; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.map.join.conversion.factor=0.00018f; set hive.stats.fetch.column.stats=false; set hive.exec.reducers.bytes.per.reducer=200000; diff --git a/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q b/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q index 32b2e04..13a8dc5 100644 --- a/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q +++ b/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q @@ -5,7 +5,6 @@ set hive.vectorized.execution.mapjoin.native.enabled=false; set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; set hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; set hive.stats.fetch.column.stats=false; ------------------------------------------------------------------------------------------ @@ -24,7 +23,7 @@ set hive.mapjoin.hybridgrace.hashtable=false; -- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage -- NOTE: of DYNAMIC PARTITION HASH JOIN instead. -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.0000005f; set hive.exec.reducers.bytes.per.reducer=500; ------------------------------------------------------------------------------------------ diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q index 30cf8c4..4a68900 100644 --- a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q +++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q @@ -15,7 +15,7 @@ set tez.cartesian-product.max-parallelism=1; SELECT 1; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1300000; +set hive.map.join.conversion.factor=0.0012f; set hive.mapjoin.optimized.hashtable.wbsize=880000; set hive.mapjoin.hybridgrace.memcheckfrequency=1024; @@ -60,7 +60,7 @@ select count(*) from where c.cint < 2000000000) t1 ; -set hive.auto.convert.join.noconditionaltask.size=3000000; +set hive.map.join.conversion.factor=0.0028f; set hive.mapjoin.optimized.hashtable.wbsize=100000; set hive.mapjoin.hybridgrace.hashtable=false; @@ -147,7 +147,7 @@ insert overwrite table parttbl partition(dt='2000-01-01') insert overwrite table parttbl partition(dt='2000-01-02') select * from src1; -set hive.auto.convert.join.noconditionaltask.size=30000000; +set hive.map.join.conversion.factor=0.028f; set hive.mapjoin.optimized.hashtable.wbsize=10000000; set hive.mapjoin.hybridgrace.hashtable=false; @@ -187,7 +187,7 @@ select count(*) from ; -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; set hive.mapjoin.optimized.hashtable.wbsize=10000; set hive.mapjoin.hybridgrace.hashtable=false; @@ -238,8 +238,7 @@ CREATE TABLE decimal_mapjoin STORED AS ORC AS FROM alltypesorc; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=50000000; +set hive.map.join.conversion.factor=0.046f; set hive.mapjoin.optimized.hashtable.wbsize=10000; SET hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q index d989ca7..4f6137b 100644 --- a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q +++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q @@ -8,8 +8,7 @@ set hive.explain.user=false; SELECT 1; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.0093f; set hive.cbo.enable=false; diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q index b33011a..2635200 100644 --- a/ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q +++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q @@ -28,7 +28,7 @@ DESCRIBE FORMATTED test_table_n11 PARTITION (part = '1'); set hive.mapjoin.check.memory.rows=1; set hive.mapjoin.localtask.max.memory.usage = 0.0001; -set hive.auto.convert.join.noconditionaltask = false; +set hive.map.join.conversion.factor=0.0f; -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. -- Tests a join which is not converted to a map join, the output should be bucketed and sorted. diff --git a/ql/src/test/queries/clientpositive/join28.q b/ql/src/test/queries/clientpositive/join28.q index 868ce9d..9b6ac97 100644 --- a/ql/src/test/queries/clientpositive/join28.q +++ b/ql/src/test/queries/clientpositive/join28.q @@ -7,8 +7,7 @@ set hive.mapred.mode=nonstrict; CREATE TABLE dest_j1_n11(key STRING, value STRING) STORED AS TEXTFILE; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join29.q b/ql/src/test/queries/clientpositive/join29.q index 20aadbb..c90ac9f 100644 --- a/ql/src/test/queries/clientpositive/join29.q +++ b/ql/src/test/queries/clientpositive/join29.q @@ -6,8 +6,7 @@ set hive.mapred.mode=nonstrict; CREATE TABLE dest_j1_n6(key STRING, cnt1 INT, cnt2 INT); set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join31.q b/ql/src/test/queries/clientpositive/join31.q index 2083407..6b3c9ba 100644 --- a/ql/src/test/queries/clientpositive/join31.q +++ b/ql/src/test/queries/clientpositive/join31.q @@ -7,8 +7,7 @@ set hive.optimize.semijoin.conversion=true; CREATE TABLE dest_j1_n22(key STRING, cnt INT); set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join32.q b/ql/src/test/queries/clientpositive/join32.q index c55e730..01f7c39 100644 --- a/ql/src/test/queries/clientpositive/join32.q +++ b/ql/src/test/queries/clientpositive/join32.q @@ -7,8 +7,7 @@ set hive.mapred.mode=nonstrict; CREATE TABLE dest_j1_n12(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join32_lessSize.q b/ql/src/test/queries/clientpositive/join32_lessSize.q index 5c7be26..0951fa9 100644 --- a/ql/src/test/queries/clientpositive/join32_lessSize.q +++ b/ql/src/test/queries/clientpositive/join32_lessSize.q @@ -9,8 +9,7 @@ CREATE TABLE dest_j1_n21(key STRING, value STRING, val2 STRING) STORED AS TEXTFI CREATE TABLE dest_j2_n1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=4000; +set hive.map.join.conversion.factor=0.0000037f; set hive.llap.memory.oversubscription.max.executors.per.query=3; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join33.q b/ql/src/test/queries/clientpositive/join33.q index 6ddf0eb..39c4ee1 100644 --- a/ql/src/test/queries/clientpositive/join33.q +++ b/ql/src/test/queries/clientpositive/join33.q @@ -8,8 +8,7 @@ set hive.mapred.mode=nonstrict; CREATE TABLE dest_j1_n7(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join34.q b/ql/src/test/queries/clientpositive/join34.q index e0234c6..ccd838c 100644 --- a/ql/src/test/queries/clientpositive/join34.q +++ b/ql/src/test/queries/clientpositive/join34.q @@ -6,8 +6,7 @@ set hive.mapred.mode=nonstrict; CREATE TABLE dest_j1_n1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join35.q b/ql/src/test/queries/clientpositive/join35.q index 29b6b00..1539c0f 100644 --- a/ql/src/test/queries/clientpositive/join35.q +++ b/ql/src/test/queries/clientpositive/join35.q @@ -6,8 +6,7 @@ set hive.mapred.mode=nonstrict; CREATE TABLE dest_j1_n24(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/join41.q b/ql/src/test/queries/clientpositive/join41.q index 33dfcb0..4409d1b 100644 --- a/ql/src/test/queries/clientpositive/join41.q +++ b/ql/src/test/queries/clientpositive/join41.q @@ -4,12 +4,12 @@ set hive.mapred.mode=nonstrict; create table s1 as select * from src where key = 0; -set hive.auto.convert.join.noconditionaltask=false; +set hive.map.join.conversion.factor=0.0f; EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10); SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10); -set hive.auto.convert.join.noconditionaltask=true; +set hive.map.join.conversion.factor=0.009f; -- Make sure the big table is chosen correctly as part of HIVE-4146 EXPLAIN diff --git a/ql/src/test/queries/clientpositive/join_grp_diff_keys.q b/ql/src/test/queries/clientpositive/join_grp_diff_keys.q index fb110b4..7901388 100644 --- a/ql/src/test/queries/clientpositive/join_grp_diff_keys.q +++ b/ql/src/test/queries/clientpositive/join_grp_diff_keys.q @@ -4,7 +4,7 @@ create table bar (id int, line_id int, orders string); create table foo (id int, line_id int, orders string); create table forecast (id int, line_id int, orders string); -set hive.auto.convert.join.noconditionaltask=false; +set hive.map.join.conversion.factor=0.0f; explain SELECT foo.id, count(*) as factor from @@ -19,4 +19,4 @@ drop table bar; drop table foo; drop table forecast; -reset hive.auto.convert.join.noconditionaltask; +reset hive.map.join.conversion.factor; diff --git a/ql/src/test/queries/clientpositive/join_star.q b/ql/src/test/queries/clientpositive/join_star.q index 70cc8f7..20e53463 100644 --- a/ql/src/test/queries/clientpositive/join_star.q +++ b/ql/src/test/queries/clientpositive/join_star.q @@ -18,8 +18,7 @@ LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim6; LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim7; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=5000; +set hive.map.join.conversion.factor=0.0000046f; explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; diff --git a/ql/src/test/queries/clientpositive/llap_smb.q b/ql/src/test/queries/clientpositive/llap_smb.q index edec301..fa8f47d 100644 --- a/ql/src/test/queries/clientpositive/llap_smb.q +++ b/ql/src/test/queries/clientpositive/llap_smb.q @@ -43,8 +43,7 @@ set hive.enforce.sortmergebucketmapjoin=false; set hive.optimize.bucketmapjoin=true; set hive.optimize.bucketmapjoin.sortedmerge=true; set hive.auto.convert.sortmerge.join=true; -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=10; +set hive.map.join.conversion.factor=0.00000001f; explain select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; diff --git a/ql/src/test/queries/clientpositive/llap_smb_ptf.q b/ql/src/test/queries/clientpositive/llap_smb_ptf.q index 037b97d..65ff95c 100644 --- a/ql/src/test/queries/clientpositive/llap_smb_ptf.q +++ b/ql/src/test/queries/clientpositive/llap_smb_ptf.q @@ -168,7 +168,7 @@ set hive.enforce.sortmergebucketmapjoin=false; set hive.optimize.bucketmapjoin=true; set hive.optimize.bucketmapjoin.sortedmerge=true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.000000001f; set hive.merge.nway.joins=false; set hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/llap_vector_nohybridgrace.q b/ql/src/test/queries/clientpositive/llap_vector_nohybridgrace.q index d637d36..c2cd7f4 100644 --- a/ql/src/test/queries/clientpositive/llap_vector_nohybridgrace.q +++ b/ql/src/test/queries/clientpositive/llap_vector_nohybridgrace.q @@ -4,7 +4,7 @@ set hive.vectorized.execution.enabled=true; set hive.vectorized.execution.mapjoin.native.enabled=true; set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1300000; +set hive.map.join.conversion.factor=0.0012f; set hive.mapjoin.optimized.hashtable.wbsize=880000; set hive.mapjoin.hybridgrace.memcheckfrequency=1024; diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q index 970a752..3e0ddd2 100644 --- a/ql/src/test/queries/clientpositive/load_data_using_job.q +++ b/ql/src/test/queries/clientpositive/load_data_using_job.q @@ -5,8 +5,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; set hive.join.emit.interval=2; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; -- Single partition @@ -107,4 +106,4 @@ CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED B explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; -select * from orc_test_txn; \ No newline at end of file +select * from orc_test_txn; diff --git a/ql/src/test/queries/clientpositive/mapjoin_decimal.q b/ql/src/test/queries/clientpositive/mapjoin_decimal.q index c84267a..b439bd5 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_decimal.q +++ b/ql/src/test/queries/clientpositive/mapjoin_decimal.q @@ -2,8 +2,7 @@ SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.0093f; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/mapjoin_hint.q b/ql/src/test/queries/clientpositive/mapjoin_hint.q index 7189f07..a153fd6 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_hint.q +++ b/ql/src/test/queries/clientpositive/mapjoin_hint.q @@ -36,8 +36,7 @@ analyze table srcpart_date_n5 compute statistics for columns; analyze table srcpart_small_n1 compute statistics for columns; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=100000000000; +set hive.map.join.conversion.factor=0.99f; --HIVE-17475 EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date_n5 join srcpart_small_n1 on (srcpart_date_n5.key = srcpart_small_n1.key1); diff --git a/ql/src/test/queries/clientpositive/mapjoin_hook.q b/ql/src/test/queries/clientpositive/mapjoin_hook.q index c929356..a7f8f14 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_hook.q +++ b/ql/src/test/queries/clientpositive/mapjoin_hook.q @@ -19,7 +19,7 @@ INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src3.value; set hive.mapjoin.localtask.max.memory.usage = 0.0001; set hive.mapjoin.check.memory.rows = 2; -set hive.auto.convert.join.noconditionaltask = false; +set hive.map.join.conversion.factor=0.0f; FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) diff --git a/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q b/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q index de05238..955f763 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q +++ b/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q @@ -4,8 +4,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=30000; +set hive.map.join.conversion.factor=0.000028f; set hive.metastore.aggregate.stats.cache.enabled=false; set hive.stats.fetch.column.stats=false; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/mapjoin_subquery.q b/ql/src/test/queries/clientpositive/mapjoin_subquery.q index 79fe197..4275f47 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_subquery.q +++ b/ql/src/test/queries/clientpositive/mapjoin_subquery.q @@ -3,8 +3,7 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- SORT_QUERY_RESULTS -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/mapjoin_subquery2.q b/ql/src/test/queries/clientpositive/mapjoin_subquery2.q index 95051d4..61d6c40 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_subquery2.q +++ b/ql/src/test/queries/clientpositive/mapjoin_subquery2.q @@ -17,8 +17,7 @@ load data local inpath '../../data/files/y.txt' INTO TABLE y; load data local inpath '../../data/files/z.txt' INTO TABLE z; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/mrr.q b/ql/src/test/queries/clientpositive/mrr.q index 0f2dd84..032be43 100644 --- a/ql/src/test/queries/clientpositive/mrr.q +++ b/ql/src/test/queries/clientpositive/mrr.q @@ -1,7 +1,7 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; set hive.explain.user=false; -set hive.auto.convert.join.noconditionaltask.size=60000000; +set hive.map.join.conversion.factor=0.056f; set hive.log.trace.id=mrrTest; -- simple query with multiple reduce stages diff --git a/ql/src/test/queries/clientpositive/multiMapJoin1.q b/ql/src/test/queries/clientpositive/multiMapJoin1.q index a65ed72..1ccc500 100644 --- a/ql/src/test/queries/clientpositive/multiMapJoin1.q +++ b/ql/src/test/queries/clientpositive/multiMapJoin1.q @@ -59,8 +59,7 @@ select count(*) FROM JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value); -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Now run a query with two-way join, which should be converted into a -- map-join followed by groupby - two MR jobs overall @@ -130,7 +129,7 @@ select * from select key as key1, key as key2, value from src ) subq; -set hive.auto.convert.join.noconditionaltask=false; +set hive.map.join.conversion.factor=0.0f; -- First disable noconditionaltask EXPLAIN SELECT SUM(HASH(join3.key1)), @@ -197,9 +196,8 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key); -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; --- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size +set hive.map.join.conversion.factor=0.00001f; +-- Enable noconditionaltask and set the size of hive.map.join.conversion.factor -- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4). -- We will use a single MR job to evaluate this query. EXPLAIN @@ -267,8 +265,8 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key); -set hive.auto.convert.join.noconditionaltask.size=200; --- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size +set hive.map.join.conversion.factor=0.00000019f; +-- Enable noconditionaltask and set the size of hive.map.join.conversion.factor -- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this -- query. The first job is a Map-only job to evaluate join1 and join2. -- The second job will evaluate the rest of this query. @@ -337,8 +335,8 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key); -set hive.auto.convert.join.noconditionaltask.size=0; --- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size +set hive.map.join.conversion.factor=0.0f; +-- Enable noconditionaltask and but set the size of hive.map.join.conversion.factor -- to 0. The plan will be the same as the one with a disabled nonconditionaltask. EXPLAIN SELECT SUM(HASH(join3.key1)), diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q index 166cb09..4d2087e 100644 --- a/ql/src/test/queries/clientpositive/multiMapJoin2.q +++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q @@ -3,8 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=6000; +set hive.map.join.conversion.factor=0.0000056f; set hive.optimize.semijoin.conversion=true; -- we will generate one MR job. @@ -21,10 +20,10 @@ FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp ORDER BY tmp.key; -set hive.auto.convert.join.noconditionaltask.size=400; +set hive.map.join.conversion.factor=0.00000037f; -- Check if the total size of local tables will be -- larger than the limit that --- we set through hive.auto.convert.join.noconditionaltask.size (right now, it is +-- we set through hive.map.join.conversion.factor (right now, it is -- 400 bytes). If so, do not merge. -- For this query, we will merge the MapJoin of x2 and y2 into the MR job -- for UNION ALL and ORDER BY. But, the MapJoin of x1 and y2 will not be merged @@ -42,7 +41,7 @@ FROM (SELECT x1.key AS key FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) SELECT x2.key AS key FROM src x2 JOIN src1 y2 ON (x2.key = y2.key)) tmp ORDER BY tmp.key; -set hive.auto.convert.join.noconditionaltask.size=6000; +set hive.map.join.conversion.factor=0.00000037f; -- We will use two jobs. -- We will generate one MR job for GROUP BY -- on x1, one MR job for both the MapJoin of x2 and y2, the UNION ALL, and the @@ -192,7 +191,7 @@ FROM part_table_n0 x JOIN src1 y ON (x.key = y.key); SELECT count(*) FROM part_table_n0 x JOIN src1 y ON (x.key = y.key); -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.01f; set hive.optimize.correlation=false; -- HIVE-5891 Alias conflict when merging multiple mapjoin tasks into their common -- child mapred task diff --git a/ql/src/test/queries/clientpositive/murmur_hash_migration.q b/ql/src/test/queries/clientpositive/murmur_hash_migration.q index 2b8da9f..8bc4f15 100644 --- a/ql/src/test/queries/clientpositive/murmur_hash_migration.q +++ b/ql/src/test/queries/clientpositive/murmur_hash_migration.q @@ -5,8 +5,7 @@ set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=30000; +set hive.map.join.conversion.factor=0.000028f; CREATE TABLE srcbucket_mapjoin_n18_stage(key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1'); CREATE TABLE srcbucket_mapjoin_part_n20_stage (key int, value string) partitioned by (ds string) STORED AS TEXTFILE TBLPROPERTIES("bucketing_version" = '1'); diff --git a/ql/src/test/queries/clientpositive/quotedid_smb.q b/ql/src/test/queries/clientpositive/quotedid_smb.q index acc7626..d30e2b0 100644 --- a/ql/src/test/queries/clientpositive/quotedid_smb.q +++ b/ql/src/test/queries/clientpositive/quotedid_smb.q @@ -29,7 +29,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; -- disable hash joins -set hive.auto.convert.join.noconditionaltask.size=1000; +set hive.map.join.conversion.factor=0.00000093f; explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` @@ -38,4 +38,4 @@ where a.`x+1` < '11' select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` where a.`x+1` < '11' -; \ No newline at end of file +; diff --git a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q index f88fd50..1dc9c82 100644 --- a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q +++ b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q @@ -2,7 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=false; -set hive.auto.convert.join.noconditionaltask=false; +set hive.map.join.conversion.factor=0.0f; set hive.convert.join.bucket.mapjoin.tez=false; set hive.optimize.dynamic.partition.hashjoin=false; set hive.limit.pushdown.memory.usage=0.3f; diff --git a/ql/src/test/queries/clientpositive/reopt_semijoin.q b/ql/src/test/queries/clientpositive/reopt_semijoin.q index 75db410..5cfa0ea 100644 --- a/ql/src/test/queries/clientpositive/reopt_semijoin.q +++ b/ql/src/test/queries/clientpositive/reopt_semijoin.q @@ -42,7 +42,7 @@ alter table x1_date_dim update statistics set( 'rawDataSize'='81449'); -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.00000000093f; set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.index.filter=true; diff --git a/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_spark.q b/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_spark.q index ac93eed..135cb56 100644 --- a/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_spark.q +++ b/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_spark.q @@ -5,9 +5,7 @@ set hive.mapred.mode=nonstrict; set hive.optimize.skewjoin = true; set hive.skewjoin.key = 4; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=50; - +set hive.map.join.conversion.factor=0.000000047f; -- This is mainly intended for spark, to test runtime skew join together with map join CREATE TABLE T1_n94(key STRING, val STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/runtime_stats_merge.q b/ql/src/test/queries/clientpositive/runtime_stats_merge.q index e694101..ed2213a 100644 --- a/ql/src/test/queries/clientpositive/runtime_stats_merge.q +++ b/ql/src/test/queries/clientpositive/runtime_stats_merge.q @@ -7,8 +7,7 @@ set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=88888888; +set hive.map.join.conversion.factor=0.083f; -- set hive.auto.convert.sortmerge.join=true; -- set hive.auto.convert.sortmerge.join.to.mapjoin=true; diff --git a/ql/src/test/queries/clientpositive/smb_cache.q b/ql/src/test/queries/clientpositive/smb_cache.q index d87ba6c..415e90f 100644 --- a/ql/src/test/queries/clientpositive/smb_cache.q +++ b/ql/src/test/queries/clientpositive/smb_cache.q @@ -104,7 +104,7 @@ t1.userid, set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=5; +set hive.map.join.conversion.factor=0.0000000047f; set hive.auto.convert.sortmerge.join=true; set hive.convert.join.bucket.mapjoin.tez = true; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_25.q b/ql/src/test/queries/clientpositive/smb_mapjoin_25.q index 56aa331..e298125 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_25.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_25.q @@ -26,8 +26,7 @@ set hive.optimize.bucketmapjoin.sortedmerge=true; set hive.mapred.reduce.tasks.speculative.execution=false; set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000000000; +set hive.map.join.conversion.factor=0.99f; set hive.optimize.reducededuplication.min.reducer=1; set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ; diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q index 0b6ecc4..a111d14 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q @@ -130,8 +130,7 @@ EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask = true; -set hive.auto.convert.join.noconditionaltask.size = 10000000; +set hive.map.join.conversion.factor=0.0093f; -- single column, single key EXPLAIN select count(*) from srcpart join srcpart_date_n4 on (srcpart.ds = srcpart_date_n4.ds) where srcpart_date_n4.`date` = '2008-04-08'; diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q index 532a50a..65b08fc 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q @@ -6,8 +6,7 @@ set hive.spark.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask = true; -set hive.auto.convert.join.noconditionaltask.size = 10000000; +set hive.map.join.conversion.factor=0.0093f; -- SORT_QUERY_RESULTS @@ -133,4 +132,4 @@ select count(*) from srcpart s1, srcpart s2 where s1.ds = s2.ds -; \ No newline at end of file +; diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q index 4a245f9..62a1067 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_3.q @@ -54,8 +54,7 @@ INSERT INTO TABLE partitioned_table5 PARTITION (part_col1 = 3, part_col2 = 3) VA SET hive.spark.dynamic.partition.pruning.map.join.only=true; SET hive.strict.checks.cartesian.product=false; -SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask.size=15; -- ensure the partitioned tables are treated as big tables +set hive.map.join.conversion.factor=0.000000014f; -- ensure the partitioned tables are treated as big tables -- left semi join where the subquery is a join between a partitioned and a non-partitioned table EXPLAIN SELECT count(*) FROM partitioned_table1 WHERE partitioned_table1.part_col IN ( @@ -139,7 +138,7 @@ WHERE partitioned_table1.part_col = partitioned_table2.part_col AND partitioned_table2.col > 1; -- dpp betwen two partitioned tables, both with multiple partition columns -SET hive.auto.convert.join.noconditionaltask.size=150; -- set auto convert size to a higher value so map-joins are triggered for the partitioned tables +set hive.map.join.conversion.factor=0.00000014f; -- set auto convert size to a higher value so map-joins are triggered for the partitioned tables EXPLAIN SELECT count(*) FROM partitioned_table4, partitioned_table5 WHERE partitioned_table4.part_col1 = partitioned_table5.part_col1 AND @@ -163,7 +162,7 @@ FROM partitioned_table1 JOIN regular_table2 ON regular_table1.col1 = regular_table2.col1; -SET hive.auto.convert.join.noconditionaltask.size=15; -- reset auto convert size to previous value +set hive.map.join.conversion.factor=0.000000014f; -- reset auto convert size to previous value -- three-way join where the partitioned table is the smallest table -- disabled until HIVE-17225 is fixed diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_5.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_5.q index d081419..6592625 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_5.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_5.q @@ -18,9 +18,9 @@ explain select * from src join part1_n0 on src.key=part1_n0.p join part2_n3 on s -- nested DPP is removed, upper most DPP is w/ map join set hive.auto.convert.join=true; -- ensure regular1 is treated as small table, and partitioned tables are not -set hive.auto.convert.join.noconditionaltask.size=20; +set hive.map.join.conversion.factor=0.000000019f; explain select * from regular1 join part1_n0 on regular1.key=part1_n0.p join part2_n3 on regular1.value=part2_n3.p; drop table part1_n0; drop table part2_n3; -drop table regular1; \ No newline at end of file +drop table regular1; diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_mapjoin_only.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_mapjoin_only.q index ba44aef..cabe47b 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_mapjoin_only.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_mapjoin_only.q @@ -13,8 +13,7 @@ create table srcpart2 as select * from srcpart; -- enable map join and set the size to be small so that only join with srcpart_date_n3 gets to be a -- map join -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=100; +set hive.map.join.conversion.factor=0.000000093f; -- checking with dpp disabled -- expectation: 2 spark jobs diff --git a/ql/src/test/queries/clientpositive/spark_explainuser_1.q b/ql/src/test/queries/clientpositive/spark_explainuser_1.q index 2718329..dcd4dd7 100644 --- a/ql/src/test/queries/clientpositive/spark_explainuser_1.q +++ b/ql/src/test/queries/clientpositive/spark_explainuser_1.q @@ -390,8 +390,7 @@ drop table sales; drop table things; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.stats.fetch.column.stats=false; set hive.mapjoin.optimized.hashtable=false; diff --git a/ql/src/test/queries/clientpositive/spark_use_ts_stats_for_mapjoin.q b/ql/src/test/queries/clientpositive/spark_use_ts_stats_for_mapjoin.q index 5ee09fc..7dc5dc0 100644 --- a/ql/src/test/queries/clientpositive/spark_use_ts_stats_for_mapjoin.q +++ b/ql/src/test/queries/clientpositive/spark_use_ts_stats_for_mapjoin.q @@ -3,7 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.auto.convert.join=true; set hive.spark.use.ts.stats.for.mapjoin=true; -set hive.auto.convert.join.noconditionaltask.size=4000; +set hive.map.join.conversion.factor=0.0000037f; -- SORT_QUERY_RESULTS EXPLAIN @@ -15,7 +15,7 @@ SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) WHERE src1.key = 97; -set hive.auto.convert.join.noconditionaltask.size=8000; +set hive.map.join.conversion.factor=0.0000075f; -- This is copied from auto_join2. Without the configuration both joins are mapjoins, -- but with the configuration on, Hive should not turn the second join into mapjoin since it @@ -35,7 +35,7 @@ SELECT sum(hash(dest.key,dest.value)) FROM dest; -- Test for HIVE-16698, for the case of UNION + MAPJOIN -set hive.auto.convert.join.noconditionaltask.size=16; +set hive.map.join.conversion.factor=0.000000015f; CREATE TABLE a (c1 STRING, c2 INT); CREATE TABLE b (c3 STRING, c4 INT); diff --git a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q index 22bb502..b71d666 100644 --- a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q @@ -130,8 +130,7 @@ EXPLAIN VECTORIZATION DETAIL select ds from (select distinct(ds) as ds from srcp select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask = true; -set hive.auto.convert.join.noconditionaltask.size = 10000000; +set hive.map.join.conversion.factor=0.0093f; -- single column, single key EXPLAIN VECTORIZATION DETAIL select count(*) from srcpart join srcpart_date_n0 on (srcpart.ds = srcpart_date_n0.ds) where srcpart_date_n0.`date` = '2008-04-08'; diff --git a/ql/src/test/queries/clientpositive/subq_where_serialization.q b/ql/src/test/queries/clientpositive/subq_where_serialization.q index de68c8a..d97808a 100644 --- a/ql/src/test/queries/clientpositive/subq_where_serialization.q +++ b/ql/src/test/queries/clientpositive/subq_where_serialization.q @@ -1,7 +1,7 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.0093f; explain select src.key from src where src.key in ( select distinct key from src); set hive.auto.convert.join=false; diff --git a/ql/src/test/queries/clientpositive/tez-tag.q b/ql/src/test/queries/clientpositive/tez-tag.q index b50eed9..141f09b 100644 --- a/ql/src/test/queries/clientpositive/tez-tag.q +++ b/ql/src/test/queries/clientpositive/tez-tag.q @@ -17,8 +17,7 @@ set hive.tez.min.bloom.filter.entries=1; set hive.stats.fetch.column.stats=true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; CREATE TABLE srcbucket_mapjoin_n9(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; @@ -48,7 +47,7 @@ select key,value from srcbucket_mapjoin_n9; set hive.convert.join.bucket.mapjoin.tez = true; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=0; +set hive.map.join.conversion.factor=0.0f; set hive.mapjoin.hybridgrace.minwbsize=125; set hive.mapjoin.hybridgrace.minnumpartitions=4; diff --git a/ql/src/test/queries/clientpositive/tez_bmj_schema_evolution.q b/ql/src/test/queries/clientpositive/tez_bmj_schema_evolution.q index de89c27..0287995 100644 --- a/ql/src/test/queries/clientpositive/tez_bmj_schema_evolution.q +++ b/ql/src/test/queries/clientpositive/tez_bmj_schema_evolution.q @@ -4,7 +4,7 @@ set hive.explain.user=false; set hive.optimize.bucketingsorting=false; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; create table test_n1 (key int, value string) partitioned by (p int) clustered by (key) into 2 buckets stored as textfile; create table test1 (key int, value string) stored as textfile; diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q index 47c0038..76b4247 100644 --- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q @@ -56,7 +56,7 @@ order by a.csmallint; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; set hive.exec.reducers.bytes.per.reducer=20000; set hive.stats.fetch.column.stats=false; -- Try with dynamically partitioned hashjoin diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q index 8076593..b2f3c18 100644 --- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_2.q @@ -33,7 +33,7 @@ order by a.csmallint, a.ctinyint, a.cint; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.map.join.conversion.factor=0.0000019f; set hive.exec.reducers.bytes.per.reducer=200000; -- noconditionaltask.size needs to be low enough that entire filtered table results do not fit in one task's hash table diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index 10e982e..1de3c86 100644 --- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -16,7 +16,7 @@ limit 1; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; set hive.exec.reducers.bytes.per.reducer=2000; explain diff --git a/ql/src/test/queries/clientpositive/tez_fixed_bucket_pruning.q b/ql/src/test/queries/clientpositive/tez_fixed_bucket_pruning.q index cbc3997..05f3922 100644 --- a/ql/src/test/queries/clientpositive/tez_fixed_bucket_pruning.q +++ b/ql/src/test/queries/clientpositive/tez_fixed_bucket_pruning.q @@ -158,7 +158,7 @@ analyze table l3_monthly_dw_dimplan compute statistics for columns; set hive.explain.user=false; set hive.auto.convert.join=true; set hive.vectorized.execution.enabled=true; -set hive.auto.convert.join.noconditionaltask.size=200000000; +set hive.map.join.conversion.factor=0.19f; set hive.optimize.index.filter=true; -- fixed bucket pruning off diff --git a/ql/src/test/queries/clientpositive/tez_join_hash.q b/ql/src/test/queries/clientpositive/tez_join_hash.q index c581895..3c8e2f7 100644 --- a/ql/src/test/queries/clientpositive/tez_join_hash.q +++ b/ql/src/test/queries/clientpositive/tez_join_hash.q @@ -8,7 +8,7 @@ create table orc_src (key string, value string) STORED AS ORC; insert into table orc_src select * from src; set hive.vectorized.execution.enabled=true; -set hive.auto.convert.join.noconditionaltask.size=1; +set hive.map.join.conversion.factor=0.00000000093f; set hive.exec.reducers.bytes.per.reducer=20000; explain @@ -17,8 +17,7 @@ SELECT count(*) FROM src, orc_src where src.key=orc_src.key; SELECT count(*) FROM src, orc_src where src.key=orc_src.key; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.map.join.conversion.factor=0.0000028f; set hive.mapjoin.hybridgrace.minwbsize=350; set hive.mapjoin.hybridgrace.minnumpartitions=8; diff --git a/ql/src/test/queries/clientpositive/tez_join_result_complex.q b/ql/src/test/queries/clientpositive/tez_join_result_complex.q index a77c44d..568c739 100644 --- a/ql/src/test/queries/clientpositive/tez_join_result_complex.q +++ b/ql/src/test/queries/clientpositive/tez_join_result_complex.q @@ -1,8 +1,7 @@ --! qt:dataset:src SET hive.vectorized.execution.enabled=false; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; set hive.mapjoin.optimized.hashtable=true; set hive.mapred.mode=nonstrict; create table service_request_clean( diff --git a/ql/src/test/queries/clientpositive/tez_smb_1.q b/ql/src/test/queries/clientpositive/tez_smb_1.q index e121d52..04499a1 100644 --- a/ql/src/test/queries/clientpositive/tez_smb_1.q +++ b/ql/src/test/queries/clientpositive/tez_smb_1.q @@ -5,8 +5,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; set hive.join.emit.interval=2; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; @@ -34,7 +33,7 @@ select key,value from srcbucket_mapjoin; set hive.convert.join.bucket.mapjoin.tez = true; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=50; +set hive.map.join.conversion.factor=0.000005f; explain select count(*) from tab s1 join tab s3 on s1.key=s3.key; @@ -116,7 +115,7 @@ select key,value from srcbucket_mapjoin; set hive.convert.join.bucket.mapjoin.tez = true; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.00005f; set test.comment=SMB disabled for external tables; set test.comment; explain diff --git a/ql/src/test/queries/clientpositive/tez_smb_empty.q b/ql/src/test/queries/clientpositive/tez_smb_empty.q index ffe30cc..9ba4c2d 100644 --- a/ql/src/test/queries/clientpositive/tez_smb_empty.q +++ b/ql/src/test/queries/clientpositive/tez_smb_empty.q @@ -4,8 +4,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.mapjoin.hybridgrace.hashtable=false; set hive.join.emit.interval=2; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; -- SORT_QUERY_RESULTS @@ -34,7 +33,7 @@ select key,value from srcbucket_mapjoin_n7; set hive.auto.convert.sortmerge.join = true; -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.00005f; CREATE TABLE empty_n0(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; explain diff --git a/ql/src/test/queries/clientpositive/tez_smb_main.q b/ql/src/test/queries/clientpositive/tez_smb_main.q index 3a75d61..d6766c2 100644 --- a/ql/src/test/queries/clientpositive/tez_smb_main.q +++ b/ql/src/test/queries/clientpositive/tez_smb_main.q @@ -12,8 +12,7 @@ select * from src a join src1 b on a.key = b.key; select * from src a join src1 b on a.key = b.key; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.001f; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; CREATE TABLE srcbucket_mapjoin_n19(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; @@ -48,7 +47,7 @@ from tab_n11 a join tab_part_n12 b on a.key = b.key; select count(*) from tab_n11 a join tab_part_n12 b on a.key = b.key; -set hive.auto.convert.join.noconditionaltask.size=4000; +set hive.map.join.conversion.factor=0.0004f; set hive.mapjoin.hybridgrace.minwbsize=500; set hive.mapjoin.hybridgrace.minnumpartitions=4; explain @@ -59,7 +58,7 @@ select count(*) from tab_n11 a join tab_part_n12 b on a.key = b.key; set hive.stats.fetch.column.stats=false; -set hive.auto.convert.join.noconditionaltask.size=4000; +set hive.map.join.conversion.factor=0.0004f; set hive.mapjoin.hybridgrace.minwbsize=250; set hive.mapjoin.hybridgrace.minnumpartitions=4; explain @@ -69,7 +68,7 @@ from tab_n11 a join tab_part_n12 b on a.key = b.key; select count(*) from tab_n11 a join tab_part_n12 b on a.key = b.key; -set hive.auto.convert.join.noconditionaltask.size=800; +set hive.map.join.conversion.factor=0.00008f; set hive.mapjoin.hybridgrace.minwbsize=125; set hive.mapjoin.hybridgrace.minnumpartitions=4; set hive.llap.memory.oversubscription.max.executors.per.query=0; @@ -90,7 +89,7 @@ UNION ALL select s2.key as key, s2.value as value from tab_n11 s2 ) a join tab_part_n12 b on (a.key = b.key); -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.002f; set hive.llap.memory.oversubscription.max.executors.per.query=0; explain select count(*) from tab_n11 a join tab_part_n12 b on a.value = b.value; @@ -110,7 +109,7 @@ UNION ALL select s2.key as key, s2.value as value from tab_n11 s2 ) a join tab_part_n12 b on (a.key = b.key); -set hive.auto.convert.join.noconditionaltask.size=5000; +set hive.map.join.conversion.factor=0.0005f; explain select count(*) from diff --git a/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q index ecb2249..00a3c9a 100644 --- a/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q +++ b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_1.q @@ -57,7 +57,7 @@ order by a.csmallint; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.map.join.conversion.factor=0.00019f; set hive.exec.reducers.bytes.per.reducer=200000; set hive.vectorized.execution.enabled=true; set hive.stats.fetch.column.stats=false; diff --git a/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q index 04d6829..287e366 100644 --- a/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q +++ b/ql/src/test/queries/clientpositive/tez_vector_dynpart_hashjoin_2.q @@ -33,7 +33,7 @@ order by a.csmallint, a.ctinyint, a.cint; set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=true; -set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.map.join.conversion.factor=0.0000019f; set hive.exec.reducers.bytes.per.reducer=200000; set hive.vectorized.execution.enabled=true; diff --git a/ql/src/test/queries/clientpositive/union22.q b/ql/src/test/queries/clientpositive/union22.q index 5d41351..57c09ea 100644 --- a/ql/src/test/queries/clientpositive/union22.q +++ b/ql/src/test/queries/clientpositive/union22.q @@ -14,8 +14,7 @@ select key, key, value, key, value, value from src; set hive.merge.mapfiles=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- Since the inputs are small, it should be automatically converted to mapjoin diff --git a/ql/src/test/queries/clientpositive/unionDistinct_1.q b/ql/src/test/queries/clientpositive/unionDistinct_1.q index 75c66b0..0f80fd8 100644 --- a/ql/src/test/queries/clientpositive/unionDistinct_1.q +++ b/ql/src/test/queries/clientpositive/unionDistinct_1.q @@ -157,8 +157,7 @@ select key, key, value, key, value, value from src; set hive.merge.mapfiles=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=8000; +set hive.map.join.conversion.factor=0.0000075f; -- Since the inputs are small, it should be automatically converted to mapjoin @@ -310,7 +309,7 @@ set hive.stats.fetch.column.stats=false; -- SORT_QUERY_RESULTS -set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.map.join.conversion.factor=0.000019f; EXPLAIN SELECT diff --git a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q index f7a14e7..9db9b1a 100644 --- a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q +++ b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q @@ -1,8 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git a/ql/src/test/queries/clientpositive/vector_char_mapjoin1.q b/ql/src/test/queries/clientpositive/vector_char_mapjoin1.q index f5c05a4..372f27b 100644 --- a/ql/src/test/queries/clientpositive/vector_char_mapjoin1.q +++ b/ql/src/test/queries/clientpositive/vector_char_mapjoin1.q @@ -3,8 +3,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; drop table if exists char_join1_vc1; drop table if exists char_join1_vc2; diff --git a/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q b/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q index ef769fb..fd03bc6 100644 --- a/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vector_decimal_mapjoin.q @@ -3,8 +3,7 @@ set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git a/ql/src/test/queries/clientpositive/vector_full_outer_join.q b/ql/src/test/queries/clientpositive/vector_full_outer_join.q index cc77488..394c884 100644 --- a/ql/src/test/queries/clientpositive/vector_full_outer_join.q +++ b/ql/src/test/queries/clientpositive/vector_full_outer_join.q @@ -3,8 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q index 1685f35..8740488 100644 --- a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q +++ b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q @@ -5,7 +5,6 @@ set hive.vectorized.execution.mapjoin.native.enabled=true; set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; set hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; set hive.stats.fetch.column.stats=false; ------------------------------------------------------------------------------------------ @@ -24,7 +23,7 @@ set hive.mapjoin.hybridgrace.hashtable=false; -- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage -- NOTE: of DYNAMIC PARTITION HASH JOIN instead. -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.00000047f; set hive.exec.reducers.bytes.per.reducer=500; ------------------------------------------------------------------------------------------ diff --git a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q index 8b59266..8989c39 100644 --- a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q +++ b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q @@ -5,7 +5,6 @@ set hive.vectorized.execution.mapjoin.native.enabled=true; set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; set hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; set hive.stats.fetch.column.stats=false; ------------------------------------------------------------------------------------------ @@ -24,7 +23,7 @@ set hive.mapjoin.hybridgrace.hashtable=false; -- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage -- NOTE: of DYNAMIC PARTITION HASH JOIN instead. -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.00000047f; set hive.exec.reducers.bytes.per.reducer=500; ------------------------------------------------------------------------------------------ diff --git a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q index 869668e..a8d2ce5 100644 --- a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q +++ b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q @@ -5,7 +5,6 @@ set hive.vectorized.execution.mapjoin.native.enabled=false; set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; set hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; set hive.stats.fetch.column.stats=false; ------------------------------------------------------------------------------------------ @@ -24,7 +23,7 @@ set hive.mapjoin.hybridgrace.hashtable=false; -- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage -- NOTE: of DYNAMIC PARTITION HASH JOIN instead. -set hive.auto.convert.join.noconditionaltask.size=500; +set hive.map.join.conversion.factor=0.00000047f; set hive.exec.reducers.bytes.per.reducer=500; ------------------------------------------------------------------------------------------ diff --git a/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q b/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q index b1d3f40..9b10141 100644 --- a/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q @@ -4,8 +4,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; set hive.exec.dynamic.partition.mode=nonstrict; -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. diff --git a/ql/src/test/queries/clientpositive/vector_include_no_sel.q b/ql/src/test/queries/clientpositive/vector_include_no_sel.q index e68db1f..95a8e49 100644 --- a/ql/src/test/queries/clientpositive/vector_include_no_sel.q +++ b/ql/src/test/queries/clientpositive/vector_include_no_sel.q @@ -3,8 +3,7 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reducesink.new.enabled=false; set hive.cbo.enable=false; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; SET hive.mapred.mode=nonstrict; set hive.fetch.task.conversion=none; diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q index 74c4433..a9248d2 100644 --- a/ql/src/test/queries/clientpositive/vector_join30.q +++ b/ql/src/test/queries/clientpositive/vector_join30.q @@ -4,8 +4,7 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/vector_join_filters.q b/ql/src/test/queries/clientpositive/vector_join_filters.q index b9f3740..1ee273c 100644 --- a/ql/src/test/queries/clientpositive/vector_join_filters.q +++ b/ql/src/test/queries/clientpositive/vector_join_filters.q @@ -3,8 +3,7 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; -- SORT_QUERY_RESULTS @@ -50,4 +49,4 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b RIGHT OUTER JOIN myinput1_n1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1_n1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); -SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b RIGHT OUTER JOIN myinput1_n1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value; \ No newline at end of file +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b RIGHT OUTER JOIN myinput1_n1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value; diff --git a/ql/src/test/queries/clientpositive/vector_join_nulls.q b/ql/src/test/queries/clientpositive/vector_join_nulls.q index f87dc44..96e5571 100644 --- a/ql/src/test/queries/clientpositive/vector_join_nulls.q +++ b/ql/src/test/queries/clientpositive/vector_join_nulls.q @@ -3,8 +3,7 @@ SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q index 84f656b..5df6f6a 100644 --- a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -3,8 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; -- SORT_QUERY_RESULTS @@ -64,4 +63,4 @@ SET hive.vectorized.execution.mapjoin.native.enabled=true; explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); -select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); diff --git a/ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q b/ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index f9b4222..d004c5b 100644 --- a/ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -2,8 +2,7 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; SET hive.merge.nway.joins=false; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/vector_llap_text_1.q b/ql/src/test/queries/clientpositive/vector_llap_text_1.q index f5c805e..c3fba24 100644 --- a/ql/src/test/queries/clientpositive/vector_llap_text_1.q +++ b/ql/src/test/queries/clientpositive/vector_llap_text_1.q @@ -7,8 +7,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; CREATE TABLE srcbucket_mapjoin_n13(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; CREATE TABLE tab_part_n8 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/vector_outer_join5.q b/ql/src/test/queries/clientpositive/vector_outer_join5.q index 19aefdc..48d8646 100644 --- a/ql/src/test/queries/clientpositive/vector_outer_join5.q +++ b/ql/src/test/queries/clientpositive/vector_outer_join5.q @@ -4,8 +4,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.map.join.conversion.factor=0.00001f; set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -174,4 +173,4 @@ left outer join small_table2 sm on pmod(sm.cbigint, 8) = s.cmodint left outer join mod_8_mod_4 s2 on s2.cmodtinyint = s.cmodtinyint -) t1; \ No newline at end of file +) t1; diff --git a/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q index c82c960..8ad9f20 100644 --- a/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q +++ b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q @@ -4,8 +4,7 @@ set hive.vectorized.execution.enabled=true; set hive.vectorized.execution.reduce.enabled=true; set hive.vectorized.execution.reducesink.new.enabled=false; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; set hive.fetch.task.conversion=none; set hive.strict.checks.cartesian.product=false; set hive.cbo.enable=false; @@ -26,4 +25,4 @@ select one as one_0, two, one as one_1 from demo a join (select 1 as one, 2 as two) b on a.one = b.one and a.two = b.two -group by a.one, a.two, a.one; \ No newline at end of file +group by a.one, a.two, a.one; diff --git a/ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q b/ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q index 285d2ac..4317ddf 100644 --- a/ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q +++ b/ql/src/test/queries/clientpositive/vector_varchar_mapjoin1.q @@ -3,8 +3,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; drop table if exists varchar_join1_vc1; drop table if exists varchar_join1_vc2; diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index 03e3d7b..822f28d 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -118,8 +118,7 @@ EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart uni select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask = true; -set hive.auto.convert.join.noconditionaltask.size = 10000000; +set hive.map.join.conversion.factor=0.0093f; -- single column, single key EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_n8 on (srcpart.ds = srcpart_date_n8.ds) where srcpart_date_n8.`date` = '2008-04-08'; diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q index 0b4e65d..c58349c 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q @@ -3,8 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -15,4 +14,4 @@ EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.ci SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 - JOIN alltypesorc t2 ON t1.cint = t2.cint; \ No newline at end of file + JOIN alltypesorc t2 ON t1.cint = t2.cint; diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q index d259547..465e9da 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q @@ -4,8 +4,7 @@ SET hive.vectorized.execution.mapjoin.native.enabled=true; set hive.cbo.enable=true; set hive.fetch.task.conversion=none; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; set hive.exec.dynamic.partition.mode=nonstrict; set hive.mapjoin.hybridgrace.hashtable=false; set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q index c216499..6e61d6a 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q @@ -2,7 +2,7 @@ set hive.vectorized.execution.enabled=true; set hive.cbo.enable=true; set hive.stats.column.autogather=true; -set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.map.join.conversion.factor=0.0093f; set hive.auto.convert.join=true; create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc; diff --git a/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q b/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q index 75a85c7..d7ded0e 100644 --- a/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q +++ b/ql/src/test/queries/clientpositive/vectorized_multi_output_select.q @@ -1,7 +1,7 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.map.join.conversion.factor=0.0000028f; set hive.strict.checks.cartesian.product=false; set hive.merge.nway.joins=false; set hive.vectorized.execution.enabled=true; diff --git a/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q index a70ba02..bbdccfb 100644 --- a/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q @@ -3,8 +3,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.map.join.conversion.factor=0.93f; set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS