diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 6a1a5f0..ce93c47 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1314,10 +1314,11 @@ HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true, ""), - HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringSet("minimal", "more"), + HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringSet("none", "minimal", "more"), "Some select queries can be converted to single FETCH task minimizing latency.\n" + "Currently the query should be single sourced not having any subquery and should not have\n" + "any aggregations or distincts (which incurs RS), lateral views and joins.\n" + + "0. none : disable hive.fetch.task.conversion\n" + "1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only\n" + "2. more : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns)" ), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java index 683618f..960fc1d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java @@ -182,6 +182,29 @@ private ExprNodeDesc analyzeExpr( } ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0]; ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1]; + // We may need to peel off the GenericUDFBridge that is added by CBO or user + boolean peelOffGenericUDFBridge = false; + while (expr1 instanceof ExprNodeGenericFuncDesc && expr2 instanceof ExprNodeGenericFuncDesc) { + GenericUDF udf1 = ((ExprNodeGenericFuncDesc) expr1).getGenericUDF(); + GenericUDF udf2 = ((ExprNodeGenericFuncDesc) expr2).getGenericUDF(); + // We assume that GenericUDFBridge that is added by CBO or user if they + // have the same udf names. + if (udf1.getUdfName() == udf2.getUdfName()) { + peelOffGenericUDFBridge = true; + expr1 = expr1.getChildren().get(0); + expr2 = expr2.getChildren().get(0); + } else { + break; + } + } + // We also need to update the expr so that the index query can be generated. + // Note that, hive does not support UDFToDouble in the query text. + if (peelOffGenericUDFBridge) { + List list = new ArrayList(); + list.add(expr1); + list.add(expr2); + expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list); + } ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2); if (extracted == null || (extracted.length > 2 && !acceptsFields)) { return expr; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index bc4ad2f..f80d38d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -140,8 +140,10 @@ public void initialize(HiveConf hiveConf) { transformations.add(new AnnotateWithOpTraits()); } - transformations.add(new SimpleFetchOptimizer()); // must be called last - + if (!HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKCONVERSION).equals("none")) { + transformations.add(new SimpleFetchOptimizer()); // must be called last + } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) { transformations.add(new SimpleFetchAggregation()); } diff --git a/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q b/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q index 579e940..ad14c0c 100644 --- a/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q +++ b/ql/src/test/queries/clientnegative/set_hiveconf_validation2.q @@ -1,4 +1,4 @@ --- should fail: hive.fetch.task.conversion accepts minimal or more +-- should fail: hive.fetch.task.conversion accepts none, minimal or more desc src; set hive.conf.validation=true; diff --git a/ql/src/test/queries/clientpositive/index_auto_partitioned.q b/ql/src/test/queries/clientpositive/index_auto_partitioned.q index 49df8f2..f9f9a1b 100644 --- a/ql/src/test/queries/clientpositive/index_auto_partitioned.q +++ b/ql/src/test/queries/clientpositive/index_auto_partitioned.q @@ -1,4 +1,5 @@ set hive.stats.dbclass=fs; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- test automatic use of index on table with partitions diff --git a/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q b/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q index 011d77a..2c0b676 100644 --- a/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q +++ b/ql/src/test/queries/clientpositive/index_bitmap_auto_partitioned.q @@ -1,4 +1,5 @@ set hive.stats.dbclass=fs; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out b/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out index 33f9360..af004a1 100644 --- a/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out +++ b/ql/src/test/results/clientnegative/set_hiveconf_validation2.q.out @@ -1,11 +1,11 @@ -PREHOOK: query: -- should fail: hive.fetch.task.conversion accepts minimal or more +PREHOOK: query: -- should fail: hive.fetch.task.conversion accepts none, minimal or more desc src PREHOOK: type: DESCTABLE PREHOOK: Input: default@src -POSTHOOK: query: -- should fail: hive.fetch.task.conversion accepts minimal or more +POSTHOOK: query: -- should fail: hive.fetch.task.conversion accepts none, minimal or more desc src POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@src key string default value string default -Query returned non-zero code: 1, cause: 'SET hive.fetch.task.conversion=true' FAILED in validation : Invalid value.. expects one of [minimal, more]. +Query returned non-zero code: 1, cause: 'SET hive.fetch.task.conversion=true' FAILED in validation : Invalid value.. expects one of [none, minimal, more].