diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index d823f03..cd68f4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -62,7 +62,9 @@ import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -814,10 +816,12 @@ static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, TypeInfo type ((SettableUDF)genericUDF).setTypeInfo(typeInfo); } } - + List childrenList = new ArrayList(children.length); + childrenList.addAll(Arrays.asList(children)); - return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList); + return ExprNodeGenericFuncDesc.newInstance(genericUDF, + childrenList); } public static ExprNodeDesc getFuncExprNodeDesc(String udfName, @@ -1048,8 +1052,36 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, } } } - - desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, children); + if (genericUDF instanceof GenericUDFOPOr) { + // flatten OR + List childrenList = new ArrayList( + children.size()); + for (ExprNodeDesc child : children) { + if (FunctionRegistry.isOpOr(child)) { + childrenList.addAll(child.getChildren()); + } else { + childrenList.add(child); + } + } + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, + childrenList); + } else if (genericUDF instanceof GenericUDFOPAnd) { + // flatten AND + List childrenList = new ArrayList( + children.size()); + for (ExprNodeDesc child : children) { + if (FunctionRegistry.isOpAnd(child)) { + childrenList.addAll(child.getChildren()); + } else { + childrenList.add(child); + } + } + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, + childrenList); + } else { + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, + children); + } } // UDFOPPositive is a no-op. // However, we still create it, and then remove it here, to make sure we diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java index 47abb20..db7fbac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java @@ -35,46 +35,43 @@ /** * GenericUDF Class for computing and. */ -@Description(name = "and", value = "a _FUNC_ b - Logical and") +@Description(name = "and", value = "a1 _FUNC_ a2 _FUNC_ ... _FUNC_ an - Logical and") @VectorizedExpressions({ColAndCol.class, FilterExprAndExpr.class, FilterColAndScalar.class, FilterScalarAndColumn.class}) public class GenericUDFOPAnd extends GenericUDF { private final BooleanWritable result = new BooleanWritable(); - private transient BooleanObjectInspector boi0,boi1; + private transient BooleanObjectInspector boi[]; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { + if (arguments.length < 2) { throw new UDFArgumentLengthException( - "The operator 'AND' only accepts 2 argument."); + "The operator 'AND' accepts at least 2 argument."); + } + boi = new BooleanObjectInspector[arguments.length]; + for (int i = 0; i < arguments.length; i++) { + boi[i] = (BooleanObjectInspector) arguments[i]; } - boi0 = (BooleanObjectInspector) arguments[0]; - boi1 = (BooleanObjectInspector) arguments[1]; return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - boolean bool_a0 = false, bool_a1 = false; - Object a0 = arguments[0].get(); - if (a0 != null) { - bool_a0 = boi0.get(a0); - if (bool_a0 == false) { - result.set(false); - return result; - } - } - - Object a1 = arguments[1].get(); - if (a1 != null) { - bool_a1 = boi1.get(a1); - if (bool_a1 == false) { - result.set(false); - return result; + boolean notNull = true; + for (int i = 0; i < arguments.length; i++) { + Object a = arguments[i].get(); + if (a != null) { + boolean bool_a = boi[i].get(a); + if (bool_a == false) { + result.set(false); + return result; + } + } else { + notNull = false; } } - if ((a0 != null && bool_a0 == true) && (a1 != null && bool_a1 == true)) { + if (notNull) { result.set(true); return result; } @@ -84,8 +81,20 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { - assert (children.length == 2); - return "(" + children[0] + " and " + children[1] + ")"; + assert (children.length >= 2); + StringBuilder sb = new StringBuilder(); + sb.append("("); + boolean first = true; + for (String and : children) { + if (!first) { + sb.append(" and "); + } else { + first = false; + } + sb.append(and); + } + sb.append(")"); + return sb.toString(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java index cd656a0..4160610 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java @@ -35,47 +35,44 @@ /** * GenericUDF Class for computing or. */ -@Description(name = "or", value = "a _FUNC_ b - Logical or") +@Description(name = "or", value = "a1 _FUNC_ a2 _FUNC_ ... _FUNC_ an - Logical or") @VectorizedExpressions({ColOrCol.class, FilterExprOrExpr.class, FilterColOrScalar.class, FilterScalarOrColumn.class}) public class GenericUDFOPOr extends GenericUDF { private final BooleanWritable result = new BooleanWritable(); - private transient BooleanObjectInspector boi0,boi1; + private transient BooleanObjectInspector[] boi; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { + if (arguments.length < 2) { throw new UDFArgumentLengthException( - "The operator 'OR' only accepts 2 argument."); + "The operator 'OR' accepts at least 2 arguments."); + } + boi = new BooleanObjectInspector[arguments.length]; + for (int i = 0; i < arguments.length; i++) { + boi[i] = (BooleanObjectInspector) arguments[i]; } - boi0 = (BooleanObjectInspector) arguments[0]; - boi1 = (BooleanObjectInspector) arguments[1]; return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - boolean bool_a0 = false, bool_a1 = false; - Object a0 = arguments[0].get(); - if (a0 != null) { - bool_a0 = boi0.get(a0); - if (bool_a0 == true) { - result.set(true); - return result; - } - } - - Object a1 = arguments[1].get(); - if (a1 != null) { - bool_a1 = boi1.get(a1); - if (bool_a1 == true) { - result.set(true); - return result; + boolean notNull = true; + for (int i = 0; i < arguments.length; i++) { + Object a = arguments[i].get(); + if (a != null) { + boolean bool_a = boi[i].get(a); + if (bool_a == true) { + result.set(true); + return result; + } + } else { + notNull = false; } } - if ((a0 != null && bool_a0 == false) && (a1 != null && bool_a1 == false)) { + if (notNull) { result.set(false); return result; } @@ -85,8 +82,20 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { - assert (children.length == 2); - return "(" + children[0] + " or " + children[1] + ")"; + assert (children.length >= 2); + StringBuilder sb = new StringBuilder(); + sb.append("("); + boolean first = true; + for (String or : children) { + if (!first) { + sb.append(" or "); + } else { + first = false; + } + sb.append(or); + } + sb.append(")"); + return sb.toString(); } } diff --git ql/src/test/queries/clientpositive/flatten_and_or.q ql/src/test/queries/clientpositive/flatten_and_or.q new file mode 100644 index 0000000..6d65225 --- /dev/null +++ ql/src/test/queries/clientpositive/flatten_and_or.q @@ -0,0 +1,17 @@ +explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +; \ No newline at end of file diff --git ql/src/test/results/clientpositive/flatten_and_or.q.out ql/src/test/results/clientpositive/flatten_and_or.q.out new file mode 100644 index 0000000..9c51ff3 --- /dev/null +++ ql/src/test/results/clientpositive/flatten_and_or.q.out @@ -0,0 +1,66 @@ +PREHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT key +FROM src +WHERE + ((key = '0' + AND value = '8') OR (key = '1' + AND value = '5') OR (key = '2' + AND value = '6') OR (key = '3' + AND value = '8') OR (key = '4' + AND value = '1') OR (key = '5' + AND value = '6') OR (key = '6' + AND value = '1') OR (key = '7' + AND value = '1') OR (key = '8' + AND value = '1') OR (key = '9' + AND value = '1') OR (key = '10' + AND value = '3')) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +