diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 62908f9..4913513 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3223,13 +3223,14 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "logger used for llap-daemons."), SPARK_USE_OP_STATS("hive.spark.use.op.stats", true, - "Whether to use operator stats to determine reducer parallelism for Hive on Spark. " - + "If this is false, Hive will use source table stats to determine reducer " - + "parallelism for all first level reduce tasks, and the maximum reducer parallelism " - + "from all parents for all the rest (second level and onward) reducer tasks."), - SPARK_USE_FILE_SIZE_FOR_MAPJOIN("hive.spark.use.file.size.for.mapjoin", false, - "If this is set to true, mapjoin optimization in Hive/Spark will use source file sizes associated " - + "with TableScan operator on the root of operator tree, instead of using operator statistics."), + "Whether to use operator stats to determine reducer parallelism for Hive on Spark.\n" + + "If this is false, Hive will use source table stats to determine reducer\n" + + "parallelism for all first level reduce tasks, and the maximum reducer parallelism\n" + + "from all parents for all the rest (second level and onward) reducer tasks."), + SPARK_USE_TS_STATS_FOR_MAPJOIN("hive.spark.use.ts.stats.for.mapjoin", false, + "If this is set to true, mapjoin optimization in Hive/Spark will use statistics from\n" + + "TableScan operators at the root of operator tree, instead of parent ReduceSink\n" + + "operators of the Join operator."), SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout", "60s", new TimeValidator(TimeUnit.SECONDS), "Timeout for requests from Hive client to remote Spark driver."), diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index d8f37ae..207f7b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -195,7 +195,7 @@ private int convertJoinBucketMapJoin(JoinOperator joinOp, MapJoinOperator mapJoi // bigTableFound means we've encountered a table that's bigger than the // max. This table is either the big table or we cannot convert. boolean bigTableFound = false; - boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_FILE_SIZE_FOR_MAPJOIN.varname, false); + boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_TS_STATS_FOR_MAPJOIN.varname, false); boolean hasUpstreamSinks = false; // Check whether there's any upstream RS.