Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1448004) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -447,6 +447,11 @@ //small table file size HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize",25000000L), //25M + // ignore mapjoin hint if the table is not sorted/bucketed + // After HIVE-3784, the mapjoin hint is only needed for bucketed/sorted tables. + // If this conf. is set, process the query as if the user has not specified the hint + HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", false), + // random number for split sampling HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0), Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1448004) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.exec.ExecDriver; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; @@ -6602,8 +6603,54 @@ } } } + + removeMapJoinHint(qb); } + // If the inputs are not bucketed, just remove the hint + private boolean isRemoveMapJoin(QBJoinTree joinTree) { + Map> aliasToOpInfo = joinTree.getAliasToOpInfo(); + for (Operator op : aliasToOpInfo.values()) { + if (op instanceof TableScanOperator) { + Table tbl = topToTable.get(op); + // Mapjoin hint is not needed for bucketed tables. + if ((tbl == null) || (tbl.getNumBuckets() <= 0)) { + return false; + } + } + else if ((op instanceof FilterOperator) || (op instanceof SelectOperator)) { + op = op.getParentOperators().get(0); + } + // Only selects are filters are allowed for mapjoins + else { + return false; + } + } + return true; + } + + private void removeMapJoinHint(QB qb) { + // After HIVE-3784, a lot of mapjoin queries are not allowed. The only reason for + // mapjoin queries are bucketed/sorted data. If the inputs are not bucketed/sorted, + // just behave as if the user never gave the hint. + // This may create a major problem in deployment. This is a temporary hack for that. + // Until hive.auto.convert.sortmerge.join is stabilized, the mapjoin hint is only needed + // for bucketed and sorted tables. For all other tables, just drop the hint. + if (!conf.getBoolVar(HiveConf.ConfVars.HIVEIGNOREMAPJOINHINT)) { + QBJoinTree currentNode = qb.getQbJoinTree(); + while (currentNode != null) { + QBJoinTree childCurrentNode = currentNode.getJoinSrc(); + if (currentNode.isMapSideJoin() && + ((childCurrentNode != null) || isRemoveMapJoin(currentNode))) { + currentNode.setMapSideJoin(false); + currentNode.setMapAliases(null); + } + + currentNode = childCurrentNode; + } + } + } + private Operator insertSelectAllPlanForGroupBy(Operator input) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input);