diff --git ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java index e66c22c..acfa428 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java @@ -28,10 +28,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -44,13 +41,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; /** * IndexPredicateAnalyzer decomposes predicates, separating the parts @@ -159,7 +155,6 @@ private ExprNodeDesc analyzeExpr( List searchConditions, Object... nodeOutputs) { - expr = (ExprNodeGenericFuncDesc) expr; if (FunctionRegistry.isOpAnd(expr)) { assert(nodeOutputs.length == 2); ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0]; @@ -179,35 +174,30 @@ private ExprNodeDesc analyzeExpr( residuals); } - String udfName; - if (expr.getGenericUDF() instanceof GenericUDFBridge) { - GenericUDFBridge func = (GenericUDFBridge) expr.getGenericUDF(); - udfName = func.getUdfName(); - } else { - udfName = expr.getGenericUDF().getClass().getName(); - } - if (!udfNames.contains(udfName)) { + GenericUDF genericUDF = expr.getGenericUDF(); + if (!udfNames.contains(genericUDF.getUdfName())) { return expr; } - - ExprNodeDesc child1 = extractConstant((ExprNodeDesc) nodeOutputs[0]); - ExprNodeDesc child2 = extractConstant((ExprNodeDesc) nodeOutputs[1]); - ExprNodeColumnDesc columnDesc = null; - ExprNodeConstantDesc constantDesc = null; - if ((child1 instanceof ExprNodeColumnDesc) - && (child2 instanceof ExprNodeConstantDesc)) { - // COL CONSTANT - columnDesc = (ExprNodeColumnDesc) child1; - constantDesc = (ExprNodeConstantDesc) child2; - } else if ((child2 instanceof ExprNodeColumnDesc) - && (child1 instanceof ExprNodeConstantDesc)) { - // CONSTANT COL - columnDesc = (ExprNodeColumnDesc) child2; - constantDesc = (ExprNodeConstantDesc) child1; + if (!(genericUDF instanceof GenericUDFBaseCompare)) { + return expr; } - if (columnDesc == null) { + ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0]; + ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1]; + ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2); + if (extracted == null) { return expr; } + if (extracted.length > 2) { + genericUDF = genericUDF.flip(); + if (!udfNames.contains(genericUDF.getUdfName())) { + return expr; + } + } + + String udfName = genericUDF.getUdfName(); + + ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) extracted[0]; + ExprNodeConstantDesc constantDesc = (ExprNodeConstantDesc) extracted[1]; if (allowedColumnNames != null) { if (!allowedColumnNames.contains(columnDesc.getColumn())) { return expr; @@ -225,55 +215,6 @@ private ExprNodeDesc analyzeExpr( return null; } - private ExprNodeDesc extractConstant(ExprNodeDesc expr) { - if (!(expr instanceof ExprNodeGenericFuncDesc)) { - return expr; - } - ExprNodeConstantDesc folded = foldConstant(((ExprNodeGenericFuncDesc) expr)); - return folded == null ? expr : folded; - } - - private ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) { - GenericUDF udf = func.getGenericUDF(); - if (!FunctionRegistry.isDeterministic(udf) || FunctionRegistry.isStateful(udf)) { - return null; - } - try { - // If the UDF depends on any external resources, we can't fold because the - // resources may not be available at compile time. - if (udf instanceof GenericUDFBridge) { - UDF internal = ReflectionUtils.newInstance(((GenericUDFBridge) udf).getUdfClass(), null); - if (internal.getRequiredFiles() != null || internal.getRequiredJars() != null) { - return null; - } - } else { - if (udf.getRequiredFiles() != null || udf.getRequiredJars() != null) { - return null; - } - } - - for (ExprNodeDesc child : func.getChildren()) { - if (child instanceof ExprNodeConstantDesc) { - continue; - } else if (child instanceof ExprNodeGenericFuncDesc) { - if (foldConstant((ExprNodeGenericFuncDesc) child) != null) { - continue; - } - } - return null; - } - ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(func); - ObjectInspector output = evaluator.initialize(null); - - Object constant = evaluator.evaluate(null); - Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); - - return new ExprNodeConstantDesc(java); - } catch (Exception e) { - return null; - } - } - /** * Translates search conditions back to ExprNodeDesc form (as * a left-deep conjunction). diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 96c8d89..d7aba99 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -22,10 +22,18 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.util.ReflectionUtils; public class ExprNodeDescUtils { @@ -244,4 +252,69 @@ private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator cur } throw new SemanticException("Met multiple parent operators"); } + + public static ExprNodeDesc[] extractComparePair(ExprNodeDesc expr1, ExprNodeDesc expr2) { + expr1 = extractConstant(expr1); + expr2 = extractConstant(expr2); + if (expr1 instanceof ExprNodeColumnDesc && expr2 instanceof ExprNodeConstantDesc) { + return new ExprNodeDesc[] {expr1, expr2}; + } + if (expr1 instanceof ExprNodeConstantDesc && expr2 instanceof ExprNodeColumnDesc) { + return new ExprNodeDesc[] {expr2, expr1, null}; // add null as a marker (inverted order) + } + // todo: constant op constant + return null; + } + + // from IndexPredicateAnalyzer + private static ExprNodeDesc extractConstant(ExprNodeDesc expr) { + if (!(expr instanceof ExprNodeGenericFuncDesc)) { + return expr; + } + ExprNodeConstantDesc folded = foldConstant(((ExprNodeGenericFuncDesc) expr)); + return folded == null ? expr : folded; + } + + private static ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) { + GenericUDF udf = func.getGenericUDF(); + if (!FunctionRegistry.isDeterministic(udf) || FunctionRegistry.isStateful(udf)) { + return null; + } + try { + // If the UDF depends on any external resources, we can't fold because the + // resources may not be available at compile time. + if (udf instanceof GenericUDFBridge) { + UDF internal = ReflectionUtils.newInstance(((GenericUDFBridge) udf).getUdfClass(), null); + if (internal.getRequiredFiles() != null || internal.getRequiredJars() != null) { + return null; + } + } else { + if (udf.getRequiredFiles() != null || udf.getRequiredJars() != null) { + return null; + } + } + + if (func.getChildren() != null) { + for (ExprNodeDesc child : func.getChildren()) { + if (child instanceof ExprNodeConstantDesc) { + continue; + } else if (child instanceof ExprNodeGenericFuncDesc) { + if (foldConstant((ExprNodeGenericFuncDesc) child) != null) { + continue; + } + } + return null; + } + } + ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(func); + ObjectInspector output = evaluator.initialize(null); + + Object constant = evaluator.evaluate(null); + Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); + + return new ExprNodeConstantDesc(java); + } catch (Exception e) { + return null; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 3d1f55e..cf0268f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -187,4 +187,15 @@ public abstract Object evaluate(DeferredObject[] arguments) */ public void close() throws IOException { } + + /** + * Some functions are affeted by order of arguments (comparisons, for example) + */ + public GenericUDF flip() { + return this; + } + + public String getUdfName() { + return getClass().getName(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java index 8206edd..1edada3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java @@ -105,6 +105,7 @@ public void setUdfName(String udfName) { this.udfName = udfName; } + @Override public String getUdfName() { return udfName; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java index d0b35a7..a39f660 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualNS.java @@ -39,4 +39,5 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } return super.evaluate(arguments); } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java index 4083f5f..edb1bf8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java @@ -52,7 +52,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarGreaterEqualStringColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Text; /** @@ -139,4 +138,9 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return result; } + @Override + public GenericUDF flip() { + return new GenericUDFOPEqualOrLessThan(); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java index 5b98bc3..06d9647 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java @@ -52,7 +52,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarLessEqualStringColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Text; /** @@ -139,5 +138,10 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return result; } + @Override + public GenericUDF flip() { + return new GenericUDFOPEqualOrGreaterThan(); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java index 2a9f4e2..28bce88 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java @@ -52,7 +52,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarGreaterStringColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Text; /** @@ -139,5 +138,10 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return result; } + @Override + public GenericUDF flip() { + return new GenericUDFOPLessThan(); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java index 3232ad1..9258b43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Text; /** @@ -110,4 +109,9 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return result; } + @Override + public GenericUDF flip() { + return new GenericUDFOPGreaterThan(); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java index 4c11e5b..1bc95df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java @@ -130,4 +130,5 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } return result; } + }