diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 2dc681e..c25a783 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -20,8 +20,6 @@ import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; -import org.apache.commons.lang3.tuple.ImmutablePair; - import java.io.OutputStream; import java.io.PrintStream; import java.io.Serializable; @@ -32,74 +30,40 @@ import java.util.Arrays; import java.util.Collection; import java.util.Comparator; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Stack; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; -import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.jsonexplain.JsonParser; import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.Validator.StringSet; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.exec.spark.SparkTask; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; -import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; -import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExplainWork; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TezWork; -import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; -import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.AnnotationUtils; @@ -109,6 +73,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; + /** * ExplainTask implementation. * @@ -705,6 +671,10 @@ private JSONObject outputPlan(Object work, @VisibleForTesting JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception { + + // Are we running tests? + final boolean inTest = queryState.getConf().getBoolVar(ConfVars.HIVE_IN_TEST); + // Check if work has an explain annotation Annotation note = AnnotationUtils.getAnnotation(work.getClass(), Explain.class); @@ -920,7 +890,11 @@ JSONObject outputPlan(Object work, PrintStream out, Object val = null; try { - val = m.invoke(work); + if (inTest && postProcess(xpl_note)) { + val = m.invoke(work, true); + } else { + val = m.invoke(work); + } } catch (InvocationTargetException ex) { // Ignore the exception, this may be caused by external jars @@ -1037,6 +1011,15 @@ JSONObject outputPlan(Object work, PrintStream out, } /** + * use case: this is only use for testing purposes. For instance, we might + * want to sort the expressions in a filter so we get deterministic comparable + * golden files + */ + private boolean postProcess(Explain exp) { + return exp.postProcess(); + } + + /** * use case: we want to print the object in explain only if it is true * how to do : print it unless the following 3 are all true: * 1. displayOnlyOnTrue tag is on diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index 7b16ad7..fbda0ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -74,4 +74,7 @@ public boolean in(Level[] levels) { } }; Vectorization vectorization() default Vectorization.NON_VECTORIZED; + + boolean postProcess() default false; + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java index 328bd86..d2bd3d7 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java @@ -81,6 +81,10 @@ public String getExprString() { return null; } + public String getExprString(boolean sortChildren) { + return getExprString(); + } + public ObjectInspector getWritableObjectInspector() { return TypeInfoUtils .getStandardWritableObjectInspectorFromTypeInfo(typeInfo); @@ -151,4 +155,5 @@ public int hashCode() { return wrapped; } } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index aef46da..8dffd0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -23,9 +23,14 @@ import java.util.Arrays; import java.util.List; +import org.apache.commons.collections.Bag; +import org.apache.commons.collections.bag.TreeBag; import org.apache.commons.lang.builder.HashCodeBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.google.common.collect.ImmutableSortedMultiset; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -33,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; @@ -166,6 +172,23 @@ public String getExprString() { } @Override + public String getExprString(boolean sortChildren) { + if (sortChildren) { + UDFType udfType = genericUDF.getClass().getAnnotation(UDFType.class); + if (udfType.commutative()) { + // Get the sorted children expr strings + String[] childrenExprStrings = new String[chidren.size()]; + for (int i = 0; i < childrenExprStrings.length; i++) { + childrenExprStrings[i] = chidren.get(i).getExprString(); + } + return genericUDF.getDisplayString( + ImmutableSortedMultiset.copyOf(childrenExprStrings).toArray(new String[childrenExprStrings.length])); + } + } + return getExprString(); + } + + @Override public List getCols() { List colList = new ArrayList(); if (chidren != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 3de310c..4b69380 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -109,16 +109,24 @@ public FilterDesc( this.sampleDescr = sampleDescr; } - @Explain(displayName = "predicate") public String getPredicateString() { return PlanUtils.getExprListString(Arrays.asList(predicate)); } - @Explain(displayName = "predicate", explainLevels = { Level.USER }) public String getUserLevelExplainPredicateString() { return PlanUtils.getExprListString(Arrays.asList(predicate), true); } + @Explain(displayName = "predicate", postProcess = true) + public String getPredicateString(boolean postProcess) { + return PlanUtils.getExprListString(Arrays.asList(predicate), false, postProcess); + } + + @Explain(displayName = "predicate", explainLevels = { Level.USER }, postProcess = true) + public String getUserLevelExplainPredicateString(boolean postProcess) { + return PlanUtils.getExprListString(Arrays.asList(predicate), true, postProcess); + } + public org.apache.hadoop.hive.ql.plan.ExprNodeDesc getPredicate() { return predicate; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 1dea6a9..a59115f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -1064,10 +1064,15 @@ public static ReadEntity addInput(Set inputs, ReadEntity newInput, b } public static String getExprListString(Collection exprs) { - return getExprListString(exprs, false); + return getExprListString(exprs, false, false); } public static String getExprListString(Collection exprs, boolean userLevelExplain) { + return getExprListString(exprs, userLevelExplain, false); + } + + public static String getExprListString(Collection exprs, + boolean userLevelExplain, boolean sortExpressions) { StringBuilder sb = new StringBuilder(); boolean first = true; for (ExprNodeDesc expr: exprs) { @@ -1076,15 +1081,19 @@ public static String getExprListString(Collection exprs } else { first = false; } - addExprToStringBuffer(expr, sb, userLevelExplain); + addExprToStringBuffer(expr, sb, userLevelExplain, sortExpressions); } - return sb.length() == 0 ? null : sb.toString(); } public static void addExprToStringBuffer(ExprNodeDesc expr, Appendable sb, boolean userLevelExplain) { + addExprToStringBuffer(expr, sb, userLevelExplain, false); + } + + public static void addExprToStringBuffer(ExprNodeDesc expr, Appendable sb, + boolean userLevelExplain, boolean sortExpressions) { try { - sb.append(expr.getExprString()); + sb.append(expr.getExprString(sortExpressions)); if (!userLevelExplain) { sb.append(" (type: "); sb.append(expr.getTypeString()); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java index fe9370f..ac3ec58 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java @@ -76,4 +76,12 @@ * @return true if the function implies order */ boolean impliesOrder() default false; + + /** + * Whether result of this operation will be altered by reordering its + * children. + * + * @return true if commutative law applies to this function + */ + boolean commutative() default false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java index fa0cda8..06d7214 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPAnd.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterScalarAndColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -39,6 +40,7 @@ @VectorizedExpressions({ColAndCol.class, FilterExprAndExpr.class, FilterColAndScalar.class, FilterScalarAndColumn.class}) @NDV(maxNdv = 2) +@UDFType(deterministic = true, commutative = true) public class GenericUDFOPAnd extends GenericUDF { private final BooleanWritable result = new BooleanWritable(); private transient BooleanObjectInspector boi[]; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java index af38c97..c189913 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPOr.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterScalarOrColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -39,6 +40,7 @@ @VectorizedExpressions({ColOrCol.class, FilterExprOrExpr.class, FilterColOrScalar.class, FilterScalarOrColumn.class}) @NDV(maxNdv = 2) +@UDFType(deterministic = true, commutative = true) public class GenericUDFOPOr extends GenericUDF { private final BooleanWritable result = new BooleanWritable(); private transient BooleanObjectInspector[] boi;