diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 47db0c0..0751e91 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2837,6 +2837,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE("hive.tez.dynamic.partition.pruning.max.data.size", 100*1024*1024L, "Maximum total data size of events in dynamic pruning."), + TEZ_DYNAMIC_SEMIJOIN_REDUCTION("hive.tez.dynamic.semijoin.reduction", true, + "When dynamic semijoin is enabled, shuffle joins will perform a leaky semijoin before shuffle. This " + + "requires hive.tez.dynamic.partition.pruning to be enabled."), TEZ_SMB_NUMBER_WAVES( "hive.tez.smb.number.waves", (float) 0.5, diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1cebc70..afec5e7 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -154,6 +154,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ delete_whole_partition.q,\ disable_merge_for_bucketing.q,\ dynamic_partition_pruning.q,\ + dynamic_semijoin_reduction.q,\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ dynpart_sort_optimization2.q,\ @@ -480,6 +481,7 @@ minillaplocal.query.files=acid_globallimit.q,\ correlationoptimizer6.q,\ disable_merge_for_bucketing.q,\ dynamic_partition_pruning.q,\ + dynamic_semijoin_reduction.q,\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ dynpart_sort_optimization_acid.q,\ diff --git a/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index cdd62ac..30b42ee 100644 --- a/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -76,7 +76,7 @@ public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, Object literal, List literalList) { return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName, - literal, literalList); + literal, literalList, null); } // can add .verboseLogging() to cause Mockito to log invocations diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java index 69ba4a2..669e23e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java @@ -70,7 +70,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (conf.getGenJoinKeys()) { int tagLen = conf.getTagLength(); joinKeys = new List[tagLen]; - JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), NOTSKIPBIGTABLE); + JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), NOTSKIPBIGTABLE, hconf); joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors,NOTSKIPBIGTABLE, tagLen); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 940f2dd..b25eb39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -251,11 +251,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { noOuterJoin = conf.isNoOuterJoin(); totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), - order,NOTSKIPBIGTABLE); + order,NOTSKIPBIGTABLE, hconf); //process join filters joinFilters = new List[tagLen]; - JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE); + JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE, hconf); joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DynamicValueRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DynamicValueRegistry.java new file mode 100644 index 0000000..63336bd --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DynamicValueRegistry.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +public interface DynamicValueRegistry { + + // Abstract class to hold info required for the implementation + public static abstract class RegistryConf { + } + + Object getValue(String key) throws Exception; + + void init(RegistryConf conf) throws Exception; +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java index 24c8281..b0384df 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeColumnEvaluator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -38,8 +39,8 @@ private transient StructField[] fields; private transient boolean[] unionField; - public ExprNodeColumnEvaluator(ExprNodeColumnDesc expr) { - super(expr); + public ExprNodeColumnEvaluator(ExprNodeColumnDesc expr, Configuration conf) { + super(expr, conf); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantDefaultEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantDefaultEvaluator.java index 89a75eb..f53c3e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantDefaultEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantDefaultEvaluator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDefaultDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -33,7 +34,11 @@ transient ObjectInspector writableObjectInspector; public ExprNodeConstantDefaultEvaluator(ExprNodeConstantDefaultDesc expr) { - super(expr); + this(expr, null); + } + + public ExprNodeConstantDefaultEvaluator(ExprNodeConstantDefaultDesc expr, Configuration conf) { + super(expr, conf); writableObjectInspector = expr.getWritableObjectInspector(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java index 4fe72a0..ca39e21 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeConstantEvaluator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -32,7 +33,11 @@ transient ConstantObjectInspector writableObjectInspector; public ExprNodeConstantEvaluator(ExprNodeConstantDesc expr) { - super(expr); + this(expr, null); + } + + public ExprNodeConstantEvaluator(ExprNodeConstantDesc expr, Configuration conf) { + super(expr, conf); writableObjectInspector = expr.getWritableObjectInspector(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeDynamicValueEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeDynamicValueEvaluator.java new file mode 100644 index 0000000..6c68215 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeDynamicValueEvaluator.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DynamicValue; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; + +/** + * ExprNodeDynamicEvaluator. + * + */ +public class ExprNodeDynamicValueEvaluator extends ExprNodeEvaluator { + + transient ObjectInspector oi; + + public ExprNodeDynamicValueEvaluator(ExprNodeDynamicValueDesc expr, Configuration conf) { + super(expr, conf); + oi = ObjectInspectorUtils.getStandardObjectInspector(expr.getWritableObjectInspector(), ObjectInspectorCopyOption.WRITABLE); + } + + @Override + public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { + return oi; + } + + @Override + protected Object _evaluate(Object row, int version) throws HiveException { + DynamicValue dynamicValue = expr.getDynamicValue(); + dynamicValue.setConf(conf); + return dynamicValue.getWritableValue(); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java index b8d6ab7..375d65f 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -30,9 +31,11 @@ protected final T expr; protected ObjectInspector outputOI; + protected Configuration conf; - public ExprNodeEvaluator(T expr) { + public ExprNodeEvaluator(T expr, Configuration conf) { this.expr = expr; + this.conf = conf; } /** @@ -109,4 +112,12 @@ public boolean isStateful() { public String toString() { return "ExprNodeEvaluator[" + expr + "]"; } + + public Configuration getConf() { + return conf; + } + + public void setConf(Configuration conf) { + this.conf = conf; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java index 0d03d8f..34aec55 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorFactory.java @@ -21,11 +21,13 @@ import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDefaultDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -39,9 +41,13 @@ private ExprNodeEvaluatorFactory() { } public static ExprNodeEvaluator get(ExprNodeDesc desc) throws HiveException { + return get(desc, null); + } + + public static ExprNodeEvaluator get(ExprNodeDesc desc, Configuration conf) throws HiveException { // Constant node if (desc instanceof ExprNodeConstantDesc) { - return new ExprNodeConstantEvaluator((ExprNodeConstantDesc) desc); + return new ExprNodeConstantEvaluator((ExprNodeConstantDesc) desc, conf); } // Special 'default' constant node @@ -51,15 +57,19 @@ public static ExprNodeEvaluator get(ExprNodeDesc desc) throws HiveException { // Column-reference node, e.g. a column in the input row if (desc instanceof ExprNodeColumnDesc) { - return new ExprNodeColumnEvaluator((ExprNodeColumnDesc) desc); + return new ExprNodeColumnEvaluator((ExprNodeColumnDesc) desc, conf); } // Generic Function node, e.g. CASE, an operator or a UDF node if (desc instanceof ExprNodeGenericFuncDesc) { - return new ExprNodeGenericFuncEvaluator((ExprNodeGenericFuncDesc) desc); + return new ExprNodeGenericFuncEvaluator((ExprNodeGenericFuncDesc) desc, conf); } // Field node, e.g. get a.myfield1 from a if (desc instanceof ExprNodeFieldDesc) { - return new ExprNodeFieldEvaluator((ExprNodeFieldDesc) desc); + return new ExprNodeFieldEvaluator((ExprNodeFieldDesc) desc, conf); + } + // Dynamic value which will be determined during query runtime + if (desc instanceof ExprNodeDynamicValueDesc) { + return new ExprNodeDynamicValueEvaluator((ExprNodeDynamicValueDesc) desc, conf); } throw new RuntimeException( "Cannot find ExprNodeEvaluator for the exprNodeDesc = " + desc); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorHead.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorHead.java index 42685fb..991bc13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorHead.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorHead.java @@ -30,7 +30,7 @@ private final ExprNodeEvaluator referencing; public ExprNodeEvaluatorHead(ExprNodeEvaluator referencing) { - super(referencing.getExpr()); + super(referencing.getExpr(), referencing.getConf()); this.referencing = referencing; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorRef.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorRef.java index 0a6b66a..625d486 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorRef.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeEvaluatorRef.java @@ -30,7 +30,7 @@ private final ExprNodeEvaluator referencing; public ExprNodeEvaluatorRef(ExprNodeEvaluator referencing) { - super(referencing.getExpr()); + super(referencing.getExpr(), referencing.getConf()); this.referencing = referencing; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java index ff32626..1241343 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeFieldEvaluator.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -43,9 +44,9 @@ transient ObjectInspector structFieldObjectInspector; transient ObjectInspector resultObjectInspector; - public ExprNodeFieldEvaluator(ExprNodeFieldDesc desc) throws HiveException { - super(desc); - leftEvaluator = ExprNodeEvaluatorFactory.get(desc.getDesc()); + public ExprNodeFieldEvaluator(ExprNodeFieldDesc desc, Configuration conf) throws HiveException { + super(desc, conf); + leftEvaluator = ExprNodeEvaluatorFactory.get(desc.getDesc(), conf); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java index 221abd9..8b9baa6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java @@ -20,6 +20,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -91,13 +92,13 @@ public Object get() throws HiveException { } } - public ExprNodeGenericFuncEvaluator(ExprNodeGenericFuncDesc expr) throws HiveException { - super(expr); + public ExprNodeGenericFuncEvaluator(ExprNodeGenericFuncDesc expr, Configuration conf) throws HiveException { + super(expr, conf); children = new ExprNodeEvaluator[expr.getChildren().size()]; isEager = false; for (int i = 0; i < children.length; i++) { ExprNodeDesc child = expr.getChildren().get(i); - ExprNodeEvaluator nodeEvaluator = ExprNodeEvaluatorFactory.get(child); + ExprNodeEvaluator nodeEvaluator = ExprNodeEvaluatorFactory.get(child, conf); children[i] = nodeEvaluator; // If we have eager evaluators anywhere below us, then we are eager too. if (nodeEvaluator instanceof ExprNodeGenericFuncEvaluator) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java index bd0d28c..df30ab2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java @@ -60,7 +60,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { try { heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); - conditionEvaluator = ExprNodeEvaluatorFactory.get(conf.getPredicate()); + conditionEvaluator = ExprNodeEvaluatorFactory.get(conf.getPredicate(), hconf); if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEEXPREVALUATIONCACHE)) { conditionEvaluator = ExprNodeEvaluatorFactory.toCachedEval(conditionEvaluator); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 46f0ecd..5e1880e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -212,7 +212,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { keyObjectInspectors = new ObjectInspector[numKeys]; currentKeyObjectInspectors = new ObjectInspector[numKeys]; for (int i = 0; i < numKeys; i++) { - keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i)); + keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf); keyObjectInspectors[i] = keyFields[i].initialize(rowInspector); currentKeyObjectInspectors[i] = ObjectInspectorUtils .getStandardObjectInspector(keyObjectInspectors[i], @@ -258,7 +258,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector( sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, - false)); + false), hconf); unionExprEval.initialize(rowInspector); } } @@ -283,7 +283,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { aggregationParameterObjects[i] = new Object[parameters.size()]; for (int j = 0; j < parameters.size(); j++) { aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory - .get(parameters.get(j)); + .get(parameters.get(j), hconf); aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j] .initialize(rowInspector); if (unionExprEval != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java index ac5331e..3a366f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java @@ -143,19 +143,19 @@ protected void initializeOp(Configuration hconf) throws HiveException { // process join keys joinKeys = new List[tagLen]; - JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias); + JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf); joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen); // process join values joinValues = new List[tagLen]; - JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias); + JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf); joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen); // process join filters joinFilters = new List[tagLen]; - JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias); + JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf); joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java index 9718c48..07a3dc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java @@ -121,14 +121,14 @@ } public static int populateJoinKeyValue(List[] outMap, - Map> inputMap, int posBigTableAlias) throws HiveException { - return populateJoinKeyValue(outMap, inputMap, null, posBigTableAlias); + Map> inputMap, int posBigTableAlias, Configuration conf) throws HiveException { + return populateJoinKeyValue(outMap, inputMap, null, posBigTableAlias, conf); } public static int populateJoinKeyValue(List[] outMap, Map> inputMap, Byte[] order, - int posBigTableAlias) throws HiveException { + int posBigTableAlias, Configuration conf) throws HiveException { int total = 0; for (Entry> e : inputMap.entrySet()) { if (e.getValue() == null) { @@ -140,7 +140,7 @@ public static int populateJoinKeyValue(List[] outMap, if (key == (byte) posBigTableAlias) { valueFields.add(null); } else { - valueFields.add(ExprNodeEvaluatorFactory.get(expr)); + valueFields.add(ExprNodeEvaluatorFactory.get(expr, conf)); } } outMap[key] = valueFields; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java index 440e0a1..b931c95 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java @@ -44,6 +44,16 @@ public T retrieve(String key, Callable fn) throws HiveException; /** + * Retrieve object from cache. + * + * @param + * @param key + * function to generate the object if it's not there + * @return the last cached object with the key, null if none. + */ + public T retrieve(String key) throws HiveException; + + /** * Retrieve object from cache asynchronously. * * @param diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheWrapper.java index 9768efa..71bcd98 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheWrapper.java @@ -36,6 +36,11 @@ public void release(String key) { } @Override + public T retrieve(String key) throws HiveException { + return globalCache.retrieve(makeKey(key)); + } + + @Override public T retrieve(String key, Callable fn) throws HiveException { return globalCache.retrieve(makeKey(key), fn); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 9049ddd..a30c771 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -63,7 +63,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { eval = new ExprNodeEvaluator[colList.size()]; for (int i = 0; i < colList.size(); i++) { assert (colList.get(i) != null); - eval[i] = ExprNodeEvaluatorFactory.get(colList.get(i)); + eval[i] = ExprNodeEvaluatorFactory.get(colList.get(i), hconf); } if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEEXPREVALUATIONCACHE)) { eval = ExprNodeEvaluatorFactory.toCachedEvals(eval); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java index 008f8a4..cfe1750 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java @@ -47,6 +47,11 @@ public void release(String key) { } @Override + public T retrieve(String key) throws HiveException { + return retrieve(key, null); + } + + @Override public T retrieve(String key, Callable fn) throws HiveException { try { if (isDebugEnabled) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java new file mode 100644 index 0000000..7bbedf6 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicValueRegistryTez.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.tez; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.DynamicValueRegistry; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.tez.runtime.api.Input; +import org.apache.tez.runtime.api.LogicalInput; +import org.apache.tez.runtime.api.ProcessorContext; +import org.apache.tez.runtime.library.api.KeyValueReader; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DynamicValueRegistryTez implements DynamicValueRegistry { + private static final Logger LOG = LoggerFactory.getLogger(DynamicValueRegistryTez.class); + + public static class RegistryConfTez extends RegistryConf { + public Configuration conf; + public BaseWork baseWork; + public ProcessorContext processorContext; + public Map inputs; + + public RegistryConfTez(Configuration conf, BaseWork baseWork, + ProcessorContext processorContext, Map inputs) { + super(); + this.conf = conf; + this.baseWork = baseWork; + this.processorContext = processorContext; + this.inputs = inputs; + } + } + + protected Map values = Collections.synchronizedMap(new HashMap()); + + public DynamicValueRegistryTez() { + } + + @Override + public Object getValue(String key) { + if (!values.containsKey(key)) { + throw new IllegalStateException("Value does not exist in registry: " + key); + } + return values.get(key); + } + + protected void setValue(String key, Object value) { + values.put(key, value); + } + + @Override + public void init(RegistryConf conf) throws Exception { + RegistryConfTez rct = (RegistryConfTez) conf; + + for (String inputSourceName : rct.baseWork.getInputSourceToRuntimeValuesInfo().keySet()) { + LOG.info("Runtime value source: " + inputSourceName); + + LogicalInput runtimeValueInput = rct.inputs.get(inputSourceName); + RuntimeValuesInfo runtimeValuesInfo = rct.baseWork.getInputSourceToRuntimeValuesInfo().get(inputSourceName); + + // Setup deserializer/obj inspectors for the incoming data source + Deserializer deserializer = ReflectionUtils.newInstance(runtimeValuesInfo.getTableDesc().getDeserializerClass(), null); + deserializer.initialize(rct.conf, runtimeValuesInfo.getTableDesc().getProperties()); + ObjectInspector inspector = deserializer.getObjectInspector(); + + // Set up col expressions for the dynamic values using this input + List colExprEvaluators = new ArrayList(); + for (ExprNodeDesc expr : runtimeValuesInfo.getColExprs()) { + ExprNodeEvaluator exprEval = ExprNodeEvaluatorFactory.get(expr, null); + exprEval.initialize(inspector); + colExprEvaluators.add(exprEval); + } + + runtimeValueInput.start(); + List inputList = new ArrayList(); + inputList.add(runtimeValueInput); + rct.processorContext.waitForAllInputsReady(inputList); + + KeyValueReader kvReader = (KeyValueReader) runtimeValueInput.getReader(); + long rowCount = 0; + while (kvReader.next()) { + Object row = deserializer.deserialize((Writable) kvReader.getCurrentValue()); + rowCount++; + for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) { + // Read each expression and save it to the value registry + ExprNodeEvaluator eval = colExprEvaluators.get(colIdx); + Object val = eval.evaluate(row); + setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val); + } + } + // For now, expecting a single row (min/max, aggregated bloom filter) + if (rowCount != 1) { + throw new IllegalStateException("Expected 1 row from " + inputSourceName + ", got " + rowCount); + } + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java index 0141230..1ce8ee9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java @@ -60,6 +60,24 @@ public void release(String key) { @SuppressWarnings("unchecked") @Override + public T retrieve(String key) throws HiveException { + + T value = null; + + lock.lock(); + try { + value = (T) registry.getIfPresent(key); + if (value != null && isLogDebugEnabled) { + LOG.debug("Found " + key + " in cache"); + } + return value; + } finally { + lock.unlock(); + } + } + + @SuppressWarnings("unchecked") + @Override public T retrieve(String key, Callable fn) throws HiveException { T value = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java index 955fa80..790c9d8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java @@ -51,11 +51,13 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.exec.tez.DynamicValueRegistryTez.RegistryConfTez; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; import org.apache.hadoop.hive.ql.exec.tez.tools.KeyValueInputMerger; import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.DynamicValue; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.Deserializer; @@ -88,8 +90,8 @@ private final ExecMapperContext execContext; private MapWork mapWork; List mergeWorkList; - List cacheKeys; - ObjectCache cache; + List cacheKeys, dynamicValueCacheKeys; + ObjectCache cache, dynamicValueCache; private int nRows; public MapRecordProcessor(final JobConf jconf, final ProcessorContext context) throws Exception { @@ -99,9 +101,11 @@ public MapRecordProcessor(final JobConf jconf, final ProcessorContext context) t setLlapOfFragmentId(context); } cache = ObjectCacheFactory.getCache(jconf, queryId, true); + dynamicValueCache = ObjectCacheFactory.getCache(jconf, queryId, false); execContext = new ExecMapperContext(jconf); execContext.setJc(jconf); cacheKeys = new ArrayList(); + dynamicValueCacheKeys = new ArrayList(); nRows = 0; } @@ -295,6 +299,21 @@ public Object call() { mapOp.initializeLocalWork(jconf); + // Setup values registry + checkAbortCondition(); + String valueRegistryKey = DynamicValue.DYNAMIC_VALUE_REGISTRY_CACHE_KEY; + // On LLAP dynamic value registry might already be cached. + final DynamicValueRegistryTez registryTez = dynamicValueCache.retrieve(valueRegistryKey, + new Callable() { + @Override + public DynamicValueRegistryTez call() { + return new DynamicValueRegistryTez(); + } + }); + dynamicValueCacheKeys.add(valueRegistryKey); + RegistryConfTez registryConf = new RegistryConfTez(jconf, mapWork, processorContext, inputs); + registryTez.init(registryConf); + checkAbortCondition(); initializeMapRecordSources(); mapOp.initializeMapOperator(jconf); @@ -435,6 +454,12 @@ void close(){ } } + if (dynamicValueCache != null && dynamicValueCacheKeys != null) { + for (String k: dynamicValueCacheKeys) { + dynamicValueCache.release(k); + } + } + // detecting failed executions by exceptions thrown by the operator tree try { if (mapOp == null || mapWork == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java index 06dca00..72dcdd3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java @@ -65,6 +65,22 @@ public void release(String key) { LOG.info("Releasing key: " + key); } + + @SuppressWarnings("unchecked") + @Override + public T retrieve(String key) throws HiveException { + T value = null; + try { + value = (T) registry.get(key); + if ( value != null) { + LOG.info("Found " + key + " in cache with value: " + value); + } + } catch (Exception e) { + throw new HiveException(e); + } + return value; + } + @SuppressWarnings("unchecked") @Override public T retrieve(String key, Callable fn) throws HiveException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index d80f201..2d06545 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -40,9 +40,11 @@ import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; +import org.apache.hadoop.hive.ql.exec.tez.DynamicValueRegistryTez.RegistryConfTez; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.DynamicValue; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -64,14 +66,14 @@ private static final String REDUCE_PLAN_KEY = "__REDUCE_PLAN__"; - private ObjectCache cache; + private ObjectCache cache, dynamicValueCache; public static final Logger l4j = LoggerFactory.getLogger(ReduceRecordProcessor.class); private ReduceWork reduceWork; List mergeWorkList = null; - List cacheKeys; + List cacheKeys, dynamicValueCacheKeys; private final Map connectOps = new TreeMap(); @@ -91,9 +93,11 @@ public ReduceRecordProcessor(final JobConf jconf, final ProcessorContext context String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); cache = ObjectCacheFactory.getCache(jconf, queryId, true); + dynamicValueCache = ObjectCacheFactory.getCache(jconf, queryId, false); String cacheKey = processorContext.getTaskVertexName() + REDUCE_PLAN_KEY; cacheKeys = Lists.newArrayList(cacheKey); + dynamicValueCacheKeys = new ArrayList(); reduceWork = (ReduceWork) cache.retrieve(cacheKey, new Callable() { @Override public Object call() { @@ -169,6 +173,21 @@ void init( l4j.info("Memory available for operators set to {}", LlapUtil.humanReadableByteCount(memoryAvailableToTask)); } OperatorUtils.setMemoryAvailable(reducer.getChildOperators(), memoryAvailableToTask); + + // Setup values registry + String valueRegistryKey = DynamicValue.DYNAMIC_VALUE_REGISTRY_CACHE_KEY; + DynamicValueRegistryTez registryTez = dynamicValueCache.retrieve(valueRegistryKey, + new Callable() { + @Override + public DynamicValueRegistryTez call() { + return new DynamicValueRegistryTez(); + } + }); + dynamicValueCacheKeys.add(valueRegistryKey); + RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs); + registryTez.init(registryConf); + checkAbortCondition(); + if (numTags > 1) { sources = new ReduceRecordSource[numTags]; mainWorkOIs = new ObjectInspector[numTags]; @@ -348,6 +367,12 @@ void close(){ } } + if (dynamicValueCache != null && dynamicValueCacheKeys != null) { + for (String k: dynamicValueCacheKeys) { + dynamicValueCache.release(k); + } + } + try { for (ReduceRecordSource rs: sources) { abort = abort && rs.close(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 0cb6c8a..848fc8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -153,7 +153,7 @@ public void assign(VectorExpressionWriter[] writers, List oids) VectorExpression vectorExpr = bigTableValueExpressions[i]; // This is a vectorized aware evaluator - ExprNodeEvaluator eval = new ExprNodeEvaluator(desc) { + ExprNodeEvaluator eval = new ExprNodeEvaluator(desc, hconf) { int columnIndex; int writerIndex; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 80b0a14..ac3363e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -207,7 +207,7 @@ public void assign(VectorExpressionWriter[] writers, List oids) VectorExpression vectorExpr = bigTableValueExpressions[i]; // This is a vectorized aware evaluator - ExprNodeEvaluator eval = new ExprNodeEvaluator(desc) { + ExprNodeEvaluator eval = new ExprNodeEvaluator(desc, hconf) { int columnIndex;; int writerIndex; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f6b6447..aeef5f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -94,9 +94,11 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.DynamicValue; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.udf.SettableUDF; @@ -585,6 +587,8 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpress } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); + } else if (exprDesc instanceof ExprNodeDynamicValueDesc) { + ve = getDynamicValueVectorExpression((ExprNodeDynamicValueDesc) exprDesc, mode); } if (ve == null) { throw new HiveException( @@ -1094,6 +1098,21 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI } } + private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr, + VectorExpressionDescriptor.Mode mode) throws HiveException { + String typeName = dynamicValueExpr.getTypeInfo().getTypeName(); + VectorExpressionDescriptor.ArgumentType vectorArgType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); + if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { + throw new HiveException("No vector argument type for type name " + typeName); + } + int outCol = -1; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + outCol = ocm.allocateOutputColumn(dynamicValueExpr.getTypeInfo()); + } + + return new DynamicValueVectorExpression(outCol, dynamicValueExpr.getTypeInfo(), dynamicValueExpr.getDynamicValue()); + } + /** * Used as a fast path for operations that don't modify their input, like unary + * and casting boolean to long. IdentityExpression and its children are always @@ -2234,6 +2253,12 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve } else if (child instanceof ExprNodeConstantDesc) { // this is a constant (or null) argDescs[i].setConstant((ExprNodeConstantDesc) child); + } else if (child instanceof ExprNodeDynamicValueDesc) { + VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION); + vectorExprs.add(e); + variableArgPositions.add(i); + exprResultColumnNums.add(e.getOutputColumn()); + argDescs[i].setVariable(e.getOutputColumn()); } else { throw new HiveException("Unable to use the VectorUDFAdaptor. Encountered unsupported expr desc : " + child); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java new file mode 100644 index 0000000..9051c54 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java @@ -0,0 +1,284 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.plan.DynamicValue; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Constant is represented as a vector with repeating values. + */ +public class DynamicValueVectorExpression extends VectorExpression { + private static final Logger LOG = LoggerFactory.getLogger(DynamicValueVectorExpression.class); + + private static final long serialVersionUID = 1L; + + DynamicValue dynamicValue; + TypeInfo typeInfo; + transient private boolean initialized = false; + + private int outputColumn; + protected long longValue = 0; + private double doubleValue = 0; + private byte[] bytesValue = null; + private HiveDecimal decimalValue = null; + private Timestamp timestampValue = null; + private HiveIntervalDayTime intervalDayTimeValue = null; + private boolean isNullValue = false; + + private ColumnVector.Type type; + private int bytesValueLength = 0; + + public DynamicValueVectorExpression() { + super(); + } + + public DynamicValueVectorExpression(int outputColumn, TypeInfo typeInfo, DynamicValue dynamicValue) { + this(); + this.outputColumn = outputColumn; + this.type = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + this.dynamicValue = dynamicValue; + this.typeInfo = typeInfo; + } + + private void evaluateLong(VectorizedRowBatch vrg) { + LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumn]; + cv.isRepeating = true; + cv.noNulls = !isNullValue; + if (!isNullValue) { + cv.vector[0] = longValue; + } else { + cv.isNull[0] = true; + } + } + + private void evaluateDouble(VectorizedRowBatch vrg) { + DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumn]; + cv.isRepeating = true; + cv.noNulls = !isNullValue; + if (!isNullValue) { + cv.vector[0] = doubleValue; + } else { + cv.isNull[0] = true; + } + } + + private void evaluateBytes(VectorizedRowBatch vrg) { + BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumn]; + cv.isRepeating = true; + cv.noNulls = !isNullValue; + cv.initBuffer(); + if (!isNullValue) { + cv.setVal(0, bytesValue, 0, bytesValueLength); + } else { + cv.isNull[0] = true; + } + } + + private void evaluateDecimal(VectorizedRowBatch vrg) { + DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumn]; + dcv.isRepeating = true; + dcv.noNulls = !isNullValue; + if (!isNullValue) { + dcv.vector[0].set(decimalValue); + } else { + dcv.isNull[0] = true; + } + } + + private void evaluateTimestamp(VectorizedRowBatch vrg) { + TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumn]; + dcv.isRepeating = true; + dcv.noNulls = !isNullValue; + if (!isNullValue) { + dcv.set(0, timestampValue); + } else { + dcv.isNull[0] = true; + } + } + + private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { + IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumn]; + dcv.isRepeating = true; + dcv.noNulls = !isNullValue; + if (!isNullValue) { + dcv.set(0, intervalDayTimeValue); + } else { + dcv.isNull[0] = true; + } + } + + private void initValue() { + Object val = dynamicValue.getValue(); + + if (val == null) { + isNullValue = true; + } else { + PrimitiveObjectInspector poi = dynamicValue.getObjectInspector(); + switch (type) { + case LONG: + longValue = PrimitiveObjectInspectorUtils.getLong(val, poi); + break; + case DOUBLE: + doubleValue = PrimitiveObjectInspectorUtils.getDouble(val, poi); + break; + case BYTES: + byte[] bytesVal; + bytesVal = PrimitiveObjectInspectorUtils.getString(val, poi).getBytes(); + setBytesValue(bytesVal); + break; + case DECIMAL: + decimalValue = PrimitiveObjectInspectorUtils.getHiveDecimal(val, poi); + break; + case TIMESTAMP: + timestampValue = PrimitiveObjectInspectorUtils.getTimestamp(val, poi); + break; + case INTERVAL_DAY_TIME: + intervalDayTimeValue = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(val, poi); + break; + default: + throw new IllegalStateException("Unsupported type " + type); + } + } + + initialized = true; + } + + @Override + public void evaluate(VectorizedRowBatch vrg) { + if (!initialized) { + initValue(); + } + + switch (type) { + case LONG: + evaluateLong(vrg); + break; + case DOUBLE: + evaluateDouble(vrg); + break; + case BYTES: + evaluateBytes(vrg); + break; + case DECIMAL: + evaluateDecimal(vrg); + break; + case TIMESTAMP: + evaluateTimestamp(vrg); + break; + case INTERVAL_DAY_TIME: + evaluateIntervalDayTime(vrg); + break; + default: + throw new IllegalStateException("Unsupported type " + type); + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public long getLongValue() { + return longValue; + } + + public void setLongValue(long longValue) { + this.longValue = longValue; + } + + public double getDoubleValue() { + return doubleValue; + } + + public void setDoubleValue(double doubleValue) { + this.doubleValue = doubleValue; + } + + public byte[] getBytesValue() { + return bytesValue; + } + + public void setBytesValue(byte[] bytesValue) { + this.bytesValue = bytesValue.clone(); + this.bytesValueLength = bytesValue.length; + } + + public void setDecimalValue(HiveDecimal decimalValue) { + this.decimalValue = decimalValue; + } + + public HiveDecimal getDecimalValue() { + return decimalValue; + } + + public void setTimestampValue(Timestamp timestampValue) { + this.timestampValue = timestampValue; + } + + public Timestamp getTimestampValue() { + return timestampValue; + } + + public void setIntervalDayTimeValue(HiveIntervalDayTime intervalDayTimeValue) { + this.intervalDayTimeValue = intervalDayTimeValue; + } + + public HiveIntervalDayTime getIntervalDayTimeValue() { + return intervalDayTimeValue; + } + + public String getTypeString() { + return getOutputType(); + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()).build(); + } + + public DynamicValue getDynamicValue() { + return dynamicValue; + } + + public void setDynamicValue(DynamicValue dynamicValue) { + this.dynamicValue = dynamicValue; + } + + public TypeInfo getTypeInfo() { + return typeInfo; + } + + public void setTypeInfo(TypeInfo typeInfo) { + this.typeInfo = typeInfo; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 9d900e4..997334b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -27,9 +27,11 @@ import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; +import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; @@ -58,14 +60,16 @@ public class ConvertAstToSearchArg { private static final Logger LOG = LoggerFactory.getLogger(ConvertAstToSearchArg.class); - private final SearchArgument.Builder builder = - SearchArgumentFactory.newBuilder(); + private final SearchArgument.Builder builder; + private final Configuration conf; /** * Builds the expression and leaf list from the original predicate. * @param expression the expression to translate. */ - ConvertAstToSearchArg(ExprNodeGenericFuncDesc expression) { + ConvertAstToSearchArg(Configuration conf, ExprNodeGenericFuncDesc expression) { + this.conf = conf; + builder = SearchArgumentFactory.newBuilder(conf); parse(expression); } @@ -182,7 +186,7 @@ private static Object boxLiteral(ExprNodeConstantDesc constantDesc, * @param type the type of the expression * @return the literal boxed if found or null */ - private static Object findLiteral(ExprNodeGenericFuncDesc expr, + private static Object findLiteral(Configuration conf, ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type) { List children = expr.getChildren(); if (children.size() != 2) { @@ -190,16 +194,29 @@ private static Object findLiteral(ExprNodeGenericFuncDesc expr, } Object result = null; for(ExprNodeDesc child: children) { - if (child instanceof ExprNodeConstantDesc) { + Object currentResult = getLiteral(conf, child, type); + if (currentResult != null) { + // Both children in the expression should not be literal if (result != null) { return null; } - result = boxLiteral((ExprNodeConstantDesc) child, type); + result = currentResult; } } return result; } + private static Object getLiteral(Configuration conf, ExprNodeDesc child, PredicateLeaf.Type type) { + if (child instanceof ExprNodeConstantDesc) { + return boxLiteral((ExprNodeConstantDesc) child, type); + } else if (child instanceof ExprNodeDynamicValueDesc) { + LiteralDelegate value = ((ExprNodeDynamicValueDesc) child).getDynamicValue(); + value.setConf(conf); + return value; + } + return null; + } + /** * Return the boxed literal at the given position * @param expr the parent node @@ -207,15 +224,12 @@ private static Object findLiteral(ExprNodeGenericFuncDesc expr, * @param position the child position to check * @return the boxed literal if found otherwise null */ - private static Object getLiteral(ExprNodeGenericFuncDesc expr, + private static Object getLiteral(Configuration conf, ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type, int position) { List children = expr.getChildren(); - Object child = children.get(position); - if (child instanceof ExprNodeConstantDesc) { - return boxLiteral((ExprNodeConstantDesc) child, type); - } - return null; + ExprNodeDesc child = children.get(position); + return getLiteral(conf, child, type); } private static Object[] getLiteralList(ExprNodeGenericFuncDesc expr, @@ -272,16 +286,16 @@ private void createLeaf(PredicateLeaf.Operator operator, builder.isNull(columnName, type); break; case EQUALS: - builder.equals(columnName, type, findLiteral(expression, type)); + builder.equals(columnName, type, findLiteral(conf, expression, type)); break; case NULL_SAFE_EQUALS: - builder.nullSafeEquals(columnName, type, findLiteral(expression, type)); + builder.nullSafeEquals(columnName, type, findLiteral(conf, expression, type)); break; case LESS_THAN: - builder.lessThan(columnName, type, findLiteral(expression, type)); + builder.lessThan(columnName, type, findLiteral(conf, expression, type)); break; case LESS_THAN_EQUALS: - builder.lessThanEquals(columnName, type, findLiteral(expression, type)); + builder.lessThanEquals(columnName, type, findLiteral(conf, expression, type)); break; case IN: builder.in(columnName, type, @@ -289,8 +303,8 @@ private void createLeaf(PredicateLeaf.Operator operator, break; case BETWEEN: builder.between(columnName, type, - getLiteral(expression, type, variable + 1), - getLiteral(expression, type, variable + 2)); + getLiteral(conf, expression, type, variable + 1), + getLiteral(conf, expression, type, variable + 2)); break; } } catch (Exception e) { @@ -425,8 +439,8 @@ private void parse(ExprNodeDesc expression) { public static final String SARG_PUSHDOWN = "sarg.pushdown"; - public static SearchArgument create(ExprNodeGenericFuncDesc expression) { - return new ConvertAstToSearchArg(expression).buildSearchArgument(); + public static SearchArgument create(Configuration conf, ExprNodeGenericFuncDesc expression) { + return new ConvertAstToSearchArg(conf, expression).buildSearchArgument(); } @@ -445,7 +459,7 @@ public static SearchArgument create(byte[] kryoBytes) { public static SearchArgument createFromConf(Configuration conf) { String sargString; if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) { - return create(SerializationUtilities.deserializeExpression(sargString)); + return create(conf, SerializationUtilities.deserializeExpression(sargString)); } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) { return create(sargString); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index beed6b8..fa5f054 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -30,23 +30,11 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; -import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; -import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; -import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.MuxOperator; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.OperatorUtils; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.GenTezUtils; import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; @@ -761,6 +749,14 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo } } + // If there is a semijoin optimization in the same operator pipeline, remove it + if (context.parseContext.getRsOpToTsOpMap().size() > 0) { + for (Operator parent : mapJoinOp.getParentOperators()) { + for (Node child : parent.getChildren()) { + GenTezUtils.removeUnderlyingSemijoinOps((Operator) child, context); + } + } + } return mapJoinOp; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 26fcc45..110b589 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -29,12 +29,15 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -50,20 +53,17 @@ import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.*; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -148,15 +148,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje FilterOperator filter = (FilterOperator) nd; FilterDesc desc = filter.getConf(); - TableScanOperator ts = null; - if (!parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING) && !parseContext.getConf().getBoolVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING)) { // nothing to do when the optimization is off return null; } - DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext(); + TableScanOperator ts = null; if (filter.getParentOperators().size() == 1 && filter.getParentOperators().get(0) instanceof TableScanOperator) { @@ -169,14 +167,32 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje LOG.debug("TableScan: " + ts); } + DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext(); + // collect the dynamic pruning conditions removerContext.dynLists.clear(); collectDynamicPruningConditions(desc.getPredicate(), removerContext); + if (ts == null) { + // Replace the synthetic predicate with true and bail out + for (DynamicListContext ctx : removerContext) { + ExprNodeDesc constNode = + new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true); + replaceExprNode(ctx, desc, constNode); + } + return false; + } + + final boolean semiJoin = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION); + for (DynamicListContext ctx : removerContext) { String column = ExprNodeDescUtils.extractColName(ctx.parent); + boolean semiJoinAttempted = false; + + if (column != null) { + // Need unique IDs to refer to each min/max key value in the DynamicValueRegistry + String keyBaseAlias = ""; - if (ts != null && column != null) { Table table = ts.getConf().getTableMetadata(); if (table != null && table.isPartitionKey(column)) { @@ -203,20 +219,51 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje } } else { LOG.debug("Column " + column + " is not a partition column"); + if (semiJoin) { + LOG.debug("Initiate min/max - bloom filter reduction for " + column); + // Get the table name from which the min-max values will come. + Operator op = ctx.generator; + while (!(op == null || op instanceof TableScanOperator)) { + op = op.getParentOperators().get(0); + } + String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias()); + keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + column; + + generateSemiJoinOperatorPlan(ctx, parseContext, ts, keyBaseAlias); + semiJoinAttempted = true; + } } - } - // we always remove the condition by replacing it with "true" - ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true); - if (ctx.grandParent == null) { - desc.setPredicate(constNode); + // If semijoin is attempted then replace the condition with a min-max filter + // else, + // we always remove the condition by replacing it with "true" + ExprNodeDesc replaceNode = null; + if (semiJoinAttempted) { + List betweenArgs = new ArrayList(); + betweenArgs.add(new ExprNodeConstantDesc(Boolean.FALSE)); // Do not invert between result + // add column expression here + betweenArgs.add(ctx.parent.getChildren().get(0)); + betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_min", ctx.desc.getTypeInfo()))); + betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_max", ctx.desc.getTypeInfo()))); + replaceNode = ExprNodeGenericFuncDesc.newInstance( + FunctionRegistry.getFunctionInfo("between").getGenericUDF(), betweenArgs); + } else { + replaceNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true); + } + replaceExprNode(ctx, desc, replaceNode); + // Revisit this + // In some cases, the predicate may not be pushed down to, + // tablescan Op, set it in Op if needed + if (semiJoinAttempted && ts.getConf().getFilterExpr() == null && + filter.getConf().getPredicate() != null) { + ts.getConf().setFilterExpr((ExprNodeGenericFuncDesc) filter.getConf().getPredicate()); + } } else { - int i = ctx.grandParent.getChildren().indexOf(ctx.parent); - ctx.grandParent.getChildren().remove(i); - ctx.grandParent.getChildren().add(i, constNode); + ExprNodeDesc constNode = + new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true); + replaceExprNode(ctx, desc, constNode); } } - // if we pushed the predicate into the table scan we need to remove the // synthetic conditions there. cleanTableScanFilters(ts); @@ -224,6 +271,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje return false; } + private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) { + if (ctx.grandParent == null) { + desc.setPredicate(node); + } else { + int i = ctx.grandParent.getChildren().indexOf(ctx.parent); + ctx.grandParent.getChildren().remove(i); + ctx.grandParent.getChildren().add(i, node); + } + } + private void cleanTableScanFilters(TableScanOperator ts) throws SemanticException { if (ts == null || ts.getConf() == null || ts.getConf().getFilterExpr() == null) { @@ -327,6 +384,157 @@ private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext pars } } + // Generates plan for min/max when dynamic partition pruning is ruled out. + private void generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, + TableScanOperator ts, String keyBaseAlias) throws SemanticException { + + // we will put a fork in the plan at the source of the reduce sink + Operator parentOfRS = ctx.generator.getParentOperators().get(0); + + // we need the expr that generated the key of the reduce sink + ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); + + List keyExprs = new ArrayList(); + keyExprs.add(key); + + // group by requires "ArrayList", don't ask. + ArrayList outputNames = new ArrayList(); + outputNames.add(HiveConf.getColumnInternalName(0)); + + // project the relevant key column + SelectDesc select = new SelectDesc(keyExprs, outputNames); + SelectOperator selectOp = + (SelectOperator) OperatorFactory.getAndMakeChild(select, parentOfRS); + + // do a group by on the list to dedup + float groupByMemoryUsage = + HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); + float memoryThreshold = + HiveConf.getFloatVar(parseContext.getConf(), + HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + + ArrayList groupByExprs = new ArrayList(); + + // Add min/max aggregations + List aggFnOIs = new ArrayList(); + aggFnOIs.add(key.getWritableObjectInspector()); + ArrayList params = new ArrayList(); + params.add( + new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), + "", false)); + + ArrayList aggs = new ArrayList(); + try { + AggregationDesc min = new AggregationDesc("min", + FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), + params, false, Mode.PARTIAL1); + AggregationDesc max = new AggregationDesc("max", + FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), + params, false, Mode.PARTIAL1); + aggs.add(min); + aggs.add(max); + } catch (SemanticException e) { + LOG.error("Error creating min/max aggregations on key", e); + throw new IllegalStateException("Error creating min/max aggregations on key", e); + } + + // Create the Group by Operator + ArrayList gbOutputNames = new ArrayList(); + gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0)); + gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1)); + GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, + gbOutputNames, new ArrayList(), aggs, false, + groupByMemoryUsage, memoryThreshold, null, false, 0, false); + + GroupByOperator groupByOp = (GroupByOperator)OperatorFactory.getAndMakeChild( + groupBy, selectOp); + + groupByOp.setColumnExprMap(new HashMap()); + + // Get the column names of the aggregations for reduce sink + int colPos = 0; + ArrayList rsValueCols = new ArrayList(); + for (AggregationDesc agg : aggs) { + ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(), + gbOutputNames.get(colPos++), "", false); + rsValueCols.add(colExpr); + } + + // Create the reduce sink operator + ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc( + new ArrayList(), rsValueCols, gbOutputNames, false, + -1, 0, 1, Operation.NOT_ACID); + ReduceSinkOperator rsOp = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(rsDesc, groupByOp); + Map columnExprMap = new HashMap(); + rsOp.setColumnExprMap(columnExprMap); + + // Create the final Group By Operator + ArrayList aggsFinal = new ArrayList(); + try { + List minFinalFnOIs = new ArrayList(); + List maxFinalFnOIs = new ArrayList(); + ArrayList minFinalParams = new ArrayList(); + ArrayList maxFinalParams = new ArrayList(); + // Use the expressions from Reduce Sink. + minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector()); + maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector()); + // Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1 + minFinalParams.add( + new ExprNodeColumnDesc( + rsValueCols.get(0).getTypeInfo(), + Utilities.ReduceField.VALUE + "." + + gbOutputNames.get(0), "", false)); + maxFinalParams.add( + new ExprNodeColumnDesc( + rsValueCols.get(1).getTypeInfo(), + Utilities.ReduceField.VALUE + "." + + gbOutputNames.get(1), "", false)); + AggregationDesc min = new AggregationDesc("min", + FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, + false, false), + minFinalParams, false, Mode.FINAL); + AggregationDesc max = new AggregationDesc("max", + FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, + false, false), + maxFinalParams, false, Mode.FINAL); + aggsFinal.add(min); + aggsFinal.add(max); + } catch (SemanticException e) { + LOG.error("Error creating min/max aggregations on key", e); + throw new IllegalStateException("Error creating min/max aggregations on key", e); + } + + GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL, + gbOutputNames, new ArrayList(), aggsFinal, false, + groupByMemoryUsage, memoryThreshold, null, false, 0, false); + GroupByOperator groupByOpFinal = (GroupByOperator)OperatorFactory.getAndMakeChild( + groupByDescFinal, rsOp); + groupByOpFinal.setColumnExprMap(new HashMap()); + + // Create the final Reduce Sink Operator + ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc( + new ArrayList(), rsValueCols, gbOutputNames, false, + -1, 0, 1, Operation.NOT_ACID); + ReduceSinkOperator rsOpFinal = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(rsDescFinal, groupByOpFinal); + LOG.debug("DynamicMinMaxPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts); + rsOpFinal.setColumnExprMap(columnExprMap); + parseContext.getRsOpToTsOpMap().put(rsOpFinal, ts); + + // Save the info that is required at query time to resolve dynamic/runtime values. + RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo(); + TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc( + PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col")); + List dynamicValueIDs = new ArrayList(); + dynamicValueIDs.add(keyBaseAlias + "_min"); + dynamicValueIDs.add(keyBaseAlias + "_max"); + + runtimeValuesInfo.setTableDesc(rsFinalTableDesc); + runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs); + runtimeValuesInfo.setColExprs(rsValueCols); + parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo); + + } + private Map collectDynamicPruningConditions(ExprNodeDesc pred, NodeProcessorCtx ctx) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java index 9e9beb0..b853a06 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java @@ -135,7 +135,7 @@ protected void generatePredicate(NodeProcessorCtx procCtx, return; } // the sargs are closely tied to hive.optimize.index.filter - SearchArgument sarg = ConvertAstToSearchArg.create(filter); + SearchArgument sarg = ConvertAstToSearchArg.create(ctxt.pctx.getConf(), filter); if (sarg == null) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java index d9ce017..b8a60f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java @@ -24,6 +24,7 @@ import java.util.Stack; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -75,16 +76,19 @@ */ @Override public ParseContext transform(ParseContext pctx) throws SemanticException { - Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + - FilterOperator.getOperatorName() + "%"), new FilterTransformer()); - - Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); - GraphWalker ogw = new DefaultGraphWalker(disp); - - List topNodes = new ArrayList(); - topNodes.addAll(pctx.getTopOps().values()); - ogw.startWalking(topNodes, null); + // Make sure semijoin is not enabled. If it is, then do not remove the dynamic partition pruning predicates. + if (!pctx.getConf().getBoolVar(HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + + FilterOperator.getOperatorName() + "%"), new FilterTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + + List topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + } return pctx; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index aa1e509..4536029 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -51,23 +51,8 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.JoinCondDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -494,6 +479,12 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateSt final ExprNodeDesc leftExpression = fd.getChildren().get(2); // left expression final ExprNodeDesc rightExpression = fd.getChildren().get(3); // right expression + // Short circuit and return the current number of rows if this is a + // synthetic predicate with dynamic values + if (leftExpression instanceof ExprNodeDynamicValueDesc) { + return stats.getNumRows(); + } + // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). // This is more straightforward, as the evaluateExpression method will deal with // generating the final row count relying on the basic comparator evaluation methods @@ -1191,20 +1182,22 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // if UDAFs are present, new columns needs to be added if (!aggDesc.isEmpty() && stats != null) { List aggColStats = Lists.newArrayList(); - for (ColumnInfo ci : rs.getSignature()) { - - // if the columns in row schema is not contained in column - // expression map, then those are the aggregate columns that - // are added GBY operator. we will estimate the column statistics - // for those newly added columns - if (!colExprMap.containsKey(ci.getInternalName())) { - String colName = ci.getInternalName(); - String colType = ci.getTypeName(); - ColStatistics cs = new ColStatistics(colName, colType); - cs.setCountDistint(stats.getNumRows()); - cs.setNumNulls(0); - cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); - aggColStats.add(cs); + if (rs != null) { + for (ColumnInfo ci : rs.getSignature()) { + + // if the columns in row schema is not contained in column + // expression map, then those are the aggregate columns that + // are added GBY operator. we will estimate the column statistics + // for those newly added columns + if (!colExprMap.containsKey(ci.getInternalName())) { + String colName = ci.getInternalName(); + String colType = ci.getTypeName(); + ColStatistics cs = new ColStatistics(colName, colType); + cs.setCountDistint(stats.getNumRows()); + cs.setNumNulls(0); + cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); + aggColStats.add(cs); + } } } @@ -2244,27 +2237,29 @@ static void updateStats(Statistics stats, long newNumRows, if (useColStats) { List colStats = stats.getColumnStats(); - for (ColStatistics cs : colStats) { - long oldNumNulls = cs.getNumNulls(); - long oldDV = cs.getCountDistint(); - long newNumNulls = Math.round(ratio * oldNumNulls); - cs.setNumNulls(newNumNulls); - if (updateNDV) { - long newDV = oldDV; - - // if ratio is greater than 1, then number of rows increases. This can happen - // when some operators like GROUPBY duplicates the input rows in which case - // number of distincts should not change. Update the distinct count only when - // the output number of rows is less than input number of rows. - if (ratio <= 1.0) { - newDV = (long) Math.ceil(ratio * oldDV); + if (colStats != null) { + for (ColStatistics cs : colStats) { + long oldNumNulls = cs.getNumNulls(); + long oldDV = cs.getCountDistint(); + long newNumNulls = Math.round(ratio * oldNumNulls); + cs.setNumNulls(newNumNulls); + if (updateNDV) { + long newDV = oldDV; + + // if ratio is greater than 1, then number of rows increases. This can happen + // when some operators like GROUPBY duplicates the input rows in which case + // number of distincts should not change. Update the distinct count only when + // the output number of rows is less than input number of rows. + if (ratio <= 1.0) { + newDV = (long) Math.ceil(ratio * oldDV); + } + cs.setCountDistint(newDV); } - cs.setCountDistint(newDV); } + stats.setColumnStats(colStats); + long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats); + stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); } - stats.setColumnStats(colStats); - long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats); - stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); } else { long newDataSize = (long) (ratio * stats.getDataSize()); stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index e2363eb..07893bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -20,13 +20,7 @@ import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.AUTOPARALLEL; -import java.util.ArrayList; -import java.util.Deque; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; +import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -42,18 +36,12 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.ReduceWork; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.TezEdgeProperty; +import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; -import org.apache.hadoop.hive.ql.plan.TezWork; -import org.apache.hadoop.hive.ql.plan.UnionWork; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -269,6 +257,15 @@ public static void removeUnionOperators(GenTezProcContext context, BaseWork work ((DynamicPruningEventDesc) event.getConf()).setTableScan((TableScanOperator) newRoot); } } + // This TableScanOperator could be part of semijoin optimization. + Map rsOpToTsOpMap = + context.parseContext.getRsOpToTsOpMap(); + for (ReduceSinkOperator rs : rsOpToTsOpMap.keySet()) { + if (rsOpToTsOpMap.get(rs) == orig) { + rsOpToTsOpMap.put(rs, (TableScanOperator) newRoot); + break; + } + } } context.rootToWorkMap.remove(orig); context.rootToWorkMap.put(newRoot, work); @@ -479,12 +476,15 @@ private static void findRoots(Operator op, List> ops) { * Remove an operator branch. When we see a fork, we know it's time to do the removal. * @param event the leaf node of which branch to be removed */ - public static void removeBranch(AppMasterEventOperator event) { + public static void removeBranch(Operator event) { Operator child = event; Operator curr = event; while (curr.getChildOperators().size() <= 1) { child = curr; + if (curr.getParentOperators().size() == 0) { + return; + } curr = curr.getParentOperators().get(0); } @@ -507,4 +507,183 @@ public static EdgeType determineEdgeType(BaseWork preceedingWork, BaseWork follo } return EdgeType.SIMPLE_EDGE; } + + public static void processDynamicMinMaxPushDownOperator( + GenTezProcContext procCtx, RuntimeValuesInfo runtimeValuesInfo, + ReduceSinkOperator rs) + throws SemanticException { + TableScanOperator ts = procCtx.parseContext.getRsOpToTsOpMap().get(rs); + + List rsWorkList = procCtx.childToWorkMap.get(rs); + if (ts == null || rsWorkList == null) { + // This happens when the ReduceSink's edge has been removed by cycle + // detection logic. Nothing to do here. + return; + } + LOG.debug("ResduceSink " + rs + " to TableScan " + ts); + + if (rsWorkList.size() != 1) { + StringBuilder sb = new StringBuilder(); + for (BaseWork curWork : rsWorkList) { + if ( sb.length() > 0) { + sb.append(", "); + } + sb.append(curWork.getName()); + } + throw new SemanticException(rs + " belongs to multiple BaseWorks: " + sb.toString()); + } + + BaseWork parentWork = rsWorkList.get(0); + BaseWork childWork = procCtx.rootToWorkMap.get(ts); + + if ( childWork instanceof MergeJoinWork) { + // This is a case of SMB join, no need of this optimization, remove it and bail out! + LOG.debug("Removing the Dynamic Semijoin Optimization from " + rs + " to " + ts); + removeBranch(rs); + removeSemiJoinOperator(procCtx.parseContext, rs, ts); + return; + } + + // Connect parent/child work with a brodacast edge. + LOG.debug("Connecting Baswork - " + parentWork.getName() + " to " + childWork.getName()); + TezEdgeProperty edgeProperty = new TezEdgeProperty(EdgeType.BROADCAST_EDGE); + TezWork tezWork = procCtx.currentTask.getWork(); + tezWork.connect(parentWork, childWork, edgeProperty); + + // Set output names in ReduceSink + rs.getConf().setOutputName(childWork.getName()); + + // Set up the dynamic values in the childWork. + RuntimeValuesInfo childRuntimeValuesInfo = + new RuntimeValuesInfo(); + childRuntimeValuesInfo.setTableDesc(runtimeValuesInfo.getTableDesc()); + childRuntimeValuesInfo.setDynamicValueIDs(runtimeValuesInfo.getDynamicValueIDs()); + childRuntimeValuesInfo.setColExprs(runtimeValuesInfo.getColExprs()); + ((MapWork) childWork).setInputSourceToRuntimeValuesInfo( + parentWork.getName(), childRuntimeValuesInfo); + } + + // Functionality to remove semi-join optimization + public static void removeSemiJoinOperator(ParseContext context, + ReduceSinkOperator rs, + TableScanOperator ts) throws SemanticException{ + // Cleanup the synthetic predicate in the tablescan operator by + // replacing it with "true" + LOG.debug("Removing ReduceSink " + rs + " and TableScan " + ts); + ExprNodeDesc constNode = new ExprNodeConstantDesc( + TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); + DynamicValuePredicateContext filterDynamicValuePredicatesCollection = + new DynamicValuePredicateContext(); + collectDynamicValuePredicates(ts.getConf().getFilterExpr(), + filterDynamicValuePredicatesCollection); + for (ExprNodeDesc nodeToRemove : filterDynamicValuePredicatesCollection + .childParentMapping.keySet()) { + // Find out if this synthetic predicate belongs to the current cycle + boolean skip = true; + for (ExprNodeDesc expr : nodeToRemove.getChildren()) { + if (expr instanceof ExprNodeDynamicValueDesc ) { + String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr) + .getDynamicValue().getId(); + String dynamicValueIdFromMap = context.getRsToRuntimeValuesInfoMap() + .get(rs).getDynamicValueIDs().get(0); + if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) { + // Intended predicate to be removed + skip = false; + break; + } + } + } + if (!skip) { + ExprNodeDesc nodeParent = filterDynamicValuePredicatesCollection + .childParentMapping.get(nodeToRemove); + if (nodeParent == null) { + // This was the only predicate, set filter expression to null + ts.getConf().setFilterExpr(null); + } else { + int i = nodeParent.getChildren().indexOf(nodeToRemove); + nodeParent.getChildren().remove(i); + nodeParent.getChildren().add(i, constNode); + } + context.getRsOpToTsOpMap().remove(rs); + // skip the rest of the predicates + break; + } + } + } + + private static class DynamicValuePredicateContext implements NodeProcessorCtx { + HashMap childParentMapping = new HashMap(); + } + + private static class DynamicValuePredicateProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + DynamicValuePredicateContext ctx = (DynamicValuePredicateContext) procCtx; + ExprNodeDesc parent = (ExprNodeDesc) stack.get(stack.size() - 2); + if (parent instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc parentFunc = (ExprNodeGenericFuncDesc) parent; + if (parentFunc.getGenericUDF() instanceof GenericUDFBetween) { + ExprNodeDesc grandParent = stack.size() >= 3 ? + (ExprNodeDesc) stack.get(stack.size() - 3) : null; + ctx.childParentMapping.put(parentFunc, grandParent); + } + } + + return null; + } + } + + private static void collectDynamicValuePredicates(ExprNodeDesc pred, NodeProcessorCtx ctx) throws SemanticException { + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher + // generates the plan from the operator tree + Map exprRules = new LinkedHashMap(); + exprRules.put(new RuleRegExp("R1", ExprNodeDynamicValueDesc.class.getName() + "%"), new DynamicValuePredicateProc()); + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, ctx); + GraphWalker egw = new DefaultGraphWalker(disp); + List startNodes = new ArrayList(); + startNodes.add(pred); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, null); + } + + private static class SemijoinRemovalProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ParseContext parseContext = ((OptimizeTezProcContext) procCtx).parseContext; + ReduceSinkOperator rs = (ReduceSinkOperator) nd; + if (parseContext.getRsOpToTsOpMap().containsKey(rs)) { + // The reducesink Op is part of the semijoin operation, remove it. + removeBranch(rs); + removeSemiJoinOperator(parseContext, rs, parseContext.getRsOpToTsOpMap().get(rs)); + } + return null; + } + } + + /** + * Removes all the semijoin optimizations underlying op + * @param op Operator to be examined. + */ + public static void removeUnderlyingSemijoinOps(Operator op, NodeProcessorCtx ctx) + throws SemanticException { + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher + // generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + "%"), + new SemijoinRemovalProc()); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); + GraphWalker ogw = new PreOrderOnceWalker(disp); + List startNodes = new ArrayList(); + startNodes.add(op); + + HashMap outputMap = new HashMap(); + ogw.startWalking(startNodes, null); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 35f34da..3f9f76c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateViewDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -125,6 +126,11 @@ private boolean needViewColumnAuthorization; private Set acidFileSinks = Collections.emptySet(); + // Map to store mapping between reduce sink Operator and TS Operator for semijoin + private Map rsOpToTsOpMap = + new HashMap(); + private Map rsToRuntimeValuesInfo = + new HashMap(); public ParseContext() { } @@ -652,4 +658,19 @@ private static void getAllOps(List builder, Set visited, Ope } } + public void setRsToRuntimeValuesInfoMap(Map rsToRuntimeValuesInfo) { + this.rsToRuntimeValuesInfo = rsToRuntimeValuesInfo; + } + + public Map getRsToRuntimeValuesInfoMap() { + return rsToRuntimeValuesInfo; + } + + public void setRsOpToTsOpMap(Map rsOpToTsOpMap) { + this.rsOpToTsOpMap = rsOpToTsOpMap; + } + + public Map getRsOpToTsOpMap() { + return rsOpToTsOpMap; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java new file mode 100644 index 0000000..e1f78f7 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java @@ -0,0 +1,44 @@ +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; + +import java.io.Serializable; +import java.util.List; + +/** + * Holds structures required for runtime values and mappings. + */ +public class RuntimeValuesInfo implements Serializable { + private static final long serialVersionUID = 1L; + + private TableDesc tableDesc; + private List dynamicValueIDs; + private List colExprs; + + // get-set methods + public TableDesc getTableDesc() { + return tableDesc; + } + + public void setTableDesc(TableDesc tableDesc) { + this.tableDesc = tableDesc; + } + + public List getDynamicValueIDs() { + return dynamicValueIDs; + } + + public void setDynamicValueIDs(List dynamicValueIDs) { + this.dynamicValueIDs = dynamicValueIDs; + } + + public List getColExprs() { + return colExprs; + } + + public void setColExprs(List colExprs) { + this.colExprs = colExprs; + } +} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index e8b003e..5f9ccc8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -530,6 +530,9 @@ public ParseContext getParseContext(ParseContext pCtx, List> component, OptimizeTezProcContext context) { - AppMasterEventOperator victim = null; + private void removeCycleOperator(Set> component, OptimizeTezProcContext context) throws SemanticException { + AppMasterEventOperator victimAM = null; + TableScanOperator victimTS = null; + ReduceSinkOperator victimRS = null; + for (Operator o : component) { + // Look for AppMasterEventOperator or ReduceSinkOperator if (o instanceof AppMasterEventOperator) { - if (victim == null - || o.getConf().getStatistics().getDataSize() < victim.getConf().getStatistics() + if (victimAM == null + || o.getStatistics().getDataSize() < victimAM.getStatistics() .getDataSize()) { - victim = (AppMasterEventOperator) o; + victimAM = (AppMasterEventOperator) o; + } + } else if (o instanceof ReduceSinkOperator) { + TableScanOperator ts = context.parseContext.getRsOpToTsOpMap().get(o); + if (ts == null) { + continue; + } + // Sanity check + assert component.contains(ts); + + if (victimRS == null || + ts.getStatistics().getDataSize() < + victimTS.getStatistics().getDataSize()) { + victimRS = (ReduceSinkOperator) o; + victimTS = ts; } } } + // Always set the min/max optimization as victim. + Operator victim = victimRS; + + if (victimRS == null && victimAM != null ) { + victim = victimAM; + } else if (victimAM == null) { + // do nothing + } else { + // Cycle consists of atleast one dynamic partition pruning(DPP) + // optimization and atleast one min/max optimization. + // DPP is a better optimization unless it ends up scanning the + // bigger table for keys instead of the smaller table. + + // Get the parent TS of victimRS. + Operator op = victimRS; + while(!(op instanceof TableScanOperator)) { + op = op.getParentOperators().get(0); + } + if ((2 * op.getStatistics().getDataSize()) < + victimAM.getStatistics().getDataSize()) { + victim = victimAM; + } + } + if (victim == null || - (!context.pruningOpsRemovedByPriorOpt.isEmpty() && - context.pruningOpsRemovedByPriorOpt.contains(victim))) { + (!context.pruningOpsRemovedByPriorOpt.isEmpty() && + context.pruningOpsRemovedByPriorOpt.contains(victim))) { return; } GenTezUtils.removeBranch(victim); - // at this point we've found the fork in the op pipeline that has the pruning as a child plan. - LOG.info("Disabling dynamic pruning for: " - + ((DynamicPruningEventDesc) victim.getConf()).getTableScan().toString() - + ". Needed to break cyclic dependency"); + + if (victim == victimRS) { + GenTezUtils.removeSemiJoinOperator(context.parseContext, victimRS, victimTS); + if (victimAM == null && context.parseContext.getRsOpToTsOpMap().size() > 0) { + // Special handling for SMB joins. + // If the same operator pipeline contains a SMB join, then the other + // semijoin branch which is intact is of little use. Find out if there + // is SMB join and remove it as well. + SemijoinRemovalContext semijoinRemovalCtx = + new SemijoinRemovalContext(); + collectSemijoinOps(victimTS, semijoinRemovalCtx); + for (Operator parent : semijoinRemovalCtx.parents) { + GenTezUtils.removeUnderlyingSemijoinOps(parent, context); + } + } + } + return; } // Tarjan's algo @@ -209,7 +241,7 @@ private void removeEventOperator(Set> component, OptimizeTezProcCont for (Operator o : deque) { if (!indexes.containsKey(o)) { - connect(o, index, nodes, indexes, lowLinks, components); + connect(o, index, nodes, indexes, lowLinks, components, procCtx.parseContext); } } @@ -218,7 +250,7 @@ private void removeEventOperator(Set> component, OptimizeTezProcCont private void connect(Operator o, AtomicInteger index, Stack> nodes, Map, Integer> indexes, Map, Integer> lowLinks, - Set>> components) { + Set>> components, ParseContext parseContext) { indexes.put(o, index.get()); lowLinks.put(o, index.get()); @@ -232,13 +264,22 @@ private void connect(Operator o, AtomicInteger index, Stack> node TableScanOperator ts = ((DynamicPruningEventDesc) o.getConf()).getTableScan(); LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString()); children.add(ts); + } else if (o instanceof ReduceSinkOperator){ + // min/max case + children = new ArrayList>(); + children.addAll(o.getChildOperators()); + TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(o); + if (ts != null) { + LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString()); + children.add(ts); + } } else { children = o.getChildOperators(); } for (Operator child : children) { if (!indexes.containsKey(child)) { - connect(child, index, nodes, indexes, lowLinks, components); + connect(child, index, nodes, indexes, lowLinks, components, parseContext); lowLinks.put(o, Math.min(lowLinks.get(o), lowLinks.get(child))); } else if (nodes.contains(child)) { lowLinks.put(o, Math.min(lowLinks.get(o), indexes.get(child))); @@ -401,6 +442,14 @@ protected void generateTaskTree(List> rootTasks, Pa GenTezUtils.processFileSink(procCtx, fileSink); } + // Connect any edges required for min/max pushdown + if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) { + for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) { + // Process min/max + GenTezUtils.processDynamicMinMaxPushDownOperator( + procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs); + } + } // and finally we hook up any events that need to be sent to the tez AM LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events."); for (AppMasterEventOperator event : procCtx.eventOperatorSet) { @@ -528,4 +577,41 @@ protected void optimizeTaskPlan(List> rootTasks, Pa perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan"); return; } + + private static class SemijoinRemovalContext implements NodeProcessorCtx { + List> parents = new ArrayList>(); + } + + private static class SemijoinRemovalProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + SemijoinRemovalContext ctx = (SemijoinRemovalContext) procCtx; + Operator parent = (Operator) stack.get(stack.size() - 2); + ctx.parents.add(parent); + return null; + } + } + + private static void collectSemijoinOps(Operator ts, NodeProcessorCtx ctx) throws SemanticException { + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher + // generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%" + + TezDummyStoreOperator.getOperatorName() + "%"), + new SemijoinRemovalProc()); + opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + "%" + + CommonMergeJoinOperator.getOperatorName() + "%"), + new SemijoinRemovalProc()); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); + GraphWalker ogw = new PreOrderOnceWalker(disp); + List startNodes = new ArrayList(); + startNodes.add(ts); + + HashMap outputMap = new HashMap(); + ogw.startWalking(startNodes, null); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 13a0811..8c341fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedList; import java.util.LinkedHashSet; import java.util.List; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -76,6 +78,10 @@ public BaseWork(String name) { private int reservedMemoryMB = -1; // default to -1 means we leave it up to Tez to decide + // Used for value registry + private Map inputSourceToRuntimeValuesInfo = + new HashMap(); + public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; } @@ -251,4 +257,13 @@ public void addSortCols(List sortCols) { public List getSortCols() { return sortColNames; } + + public Map getInputSourceToRuntimeValuesInfo() { + return inputSourceToRuntimeValuesInfo; + } + + public void setInputSourceToRuntimeValuesInfo( + String workName, RuntimeValuesInfo runtimeValuesInfo) { + inputSourceToRuntimeValuesInfo.put(workName, runtimeValuesInfo); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java new file mode 100644 index 0000000..874c62b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.DynamicValueRegistry; +import org.apache.hadoop.hive.ql.exec.ObjectCache; +import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import java.io.Serializable; + + +public class DynamicValue implements LiteralDelegate, Serializable { + + private static final long serialVersionUID = 1L; + + public static final String DYNAMIC_VALUE_REGISTRY_CACHE_KEY = "DynamicValueRegistry"; + + protected transient Configuration conf; + + protected String id; + TypeInfo typeInfo; + PrimitiveObjectInspector objectInspector; + + transient protected Object val; + transient boolean initialized = false; + + public DynamicValue(String id, TypeInfo typeInfo) { + this.id = id; + this.typeInfo = typeInfo; + this.objectInspector = (PrimitiveObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + public TypeInfo getTypeInfo() { + return typeInfo; + } + + public void setTypeInfo(TypeInfo typeInfo) { + this.typeInfo = typeInfo; + } + + public PrimitiveObjectInspector getObjectInspector() { + return objectInspector; + } + + public void setObjectInspector(PrimitiveObjectInspector objectInspector) { + this.objectInspector = objectInspector; + } + + @Override + public String getId() { return id;} + + public void setId(String id) { + this.id = id; + } + + @Override + public Object getLiteral() { + return getJavaValue(); + } + + public Object getJavaValue() { + return objectInspector.getPrimitiveJavaObject(getValue()); + } + + public Object getWritableValue() { + return objectInspector.getPrimitiveWritableObject(getValue()); + } + + public Object getValue() { + if (initialized) { + return val; + } + + if (conf == null) { + throw new IllegalStateException("Cannot retrieve dynamic value " + id + " - no conf set"); + } + + try { + // Get object cache + String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); + ObjectCache cache = ObjectCacheFactory.getCache(conf, queryId, false); + + // Get the registry + DynamicValueRegistry valueRegistry = cache.retrieve(DYNAMIC_VALUE_REGISTRY_CACHE_KEY); + if (valueRegistry == null) { + throw new IllegalStateException("DynamicValueRegistry not available"); + } + val = valueRegistry.getValue(id); + initialized = true; + } catch (Exception err) { + throw new IllegalStateException("Failed to retrieve dynamic value for " + id, err); + } + + return val; + } + + @Override + public String toString() { + // If the id is a generated unique ID then this could affect .q file golden files for tests that run EXPLAIN queries. + return "DynamicValue(" + id + ")"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicValueDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicValueDesc.java new file mode 100644 index 0000000..c9e7b67 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicValueDesc.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + + +/** + * This expression represents a value that will be available at runtime. + * + */ +public class ExprNodeDynamicValueDesc extends ExprNodeDesc implements Serializable { + + private static final long serialVersionUID = 1L; + + protected DynamicValue dynamicValue; + + public ExprNodeDynamicValueDesc() { + } + + public ExprNodeDynamicValueDesc(DynamicValue value) { + super(value.getTypeInfo()); + this.dynamicValue = value; + } + + @Override + public ExprNodeDesc clone() { + return new ExprNodeDynamicValueDesc(dynamicValue); + } + + @Override + public boolean isSame(Object o) { + if (o instanceof ExprNodeDynamicValueDesc) { + Object otherValue = ((ExprNodeDynamicValueDesc) o).getDynamicValue(); + if (dynamicValue == null) { + return otherValue == null; + } + return dynamicValue.equals(otherValue); + } + return false; + } + + public DynamicValue getDynamicValue() { + return dynamicValue; + } + + public void setValue(DynamicValue value) { + this.dynamicValue = value; + } + + @Override + public String getExprString() { + return dynamicValue != null ? dynamicValue.toString() : "null dynamic literal"; + } + + @Override + public String toString() { + return dynamicValue != null ? dynamicValue.toString() : "null dynamic literal"; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java index 93b50a6..6563290 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Set; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -44,6 +45,8 @@ */ public class TestConvertAstToSearchArg { + private final Configuration conf = new Configuration(); + private static void assertNoSharedNodes(ExpressionTree tree, Set seen ) throws Exception { @@ -547,7 +550,7 @@ public void testExpression1() throws Exception { " \n"; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); @@ -836,7 +839,7 @@ public void testExpression2() throws Exception { " \n"; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(4, leaves.size()); @@ -1269,7 +1272,7 @@ public void testExpression3() throws Exception { " \n"; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(3, leaves.size()); @@ -1493,7 +1496,7 @@ id in (34,50) */ "\n"; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(3, leaves.size()); @@ -1763,7 +1766,7 @@ public void testExpression5() throws Exception { " \n"; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); @@ -2246,7 +2249,7 @@ public void testExpression7() throws Exception { ""; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); @@ -2405,7 +2408,7 @@ public void testExpression8() throws Exception { " "; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(0, leaves.size()); @@ -2538,7 +2541,7 @@ public void testExpression9() throws Exception { " "; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(0, leaves.size()); @@ -2663,7 +2666,7 @@ public void testExpression10() throws Exception { ""; SearchArgumentImpl sarg = - (SearchArgumentImpl) ConvertAstToSearchArg.create(getFuncDesc(exprStr)); + (SearchArgumentImpl) ConvertAstToSearchArg.create(conf, getFuncDesc(exprStr)); List leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); @@ -2712,7 +2715,7 @@ public void TestTimestampSarg() throws Exception { "AAABgj0BRVFVQcwBBW9yZy5hcGFjaGUuaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5Q" + "EAAAECAQFib29sZWHu"; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2731,7 +2734,7 @@ public void TestDateSarg() throws Exception { "Y2hlLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUH" + "MAQVvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2751,7 +2754,7 @@ public void TestDecimalSarg() throws Exception { "oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQZvcmcuYXBhY2" + "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABBAEBYm9vbGVh7g=="; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2771,7 +2774,7 @@ public void TestCharSarg() throws Exception { "vb3AuaGl2ZS5xbC51ZGYuZ2VuZXJpYy5HZW5lcmljVURGT1BFcXVh7AEAAAGCPQFFUVVBzAEGb3JnLm" + "FwYWNoZS5oYWRvb3AuaW8uQm9vbGVhbldyaXRhYmzlAQAAAQQBAWJvb2xlYe4="; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2791,7 +2794,7 @@ public void TestVarcharSarg() throws Exception { "lLmhhZG9vcC5oaXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQ" + "ZvcmcuYXBhY2hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABBAEBYm9vbGVh7g=="; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2810,7 +2813,7 @@ public void TestBigintSarg() throws Exception { "dmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5hcGFjaGU" + "uaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2831,7 +2834,7 @@ public void TestBooleanSarg() throws Exception { "hlLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAwkBAgEBYrIAAAgBAwkBB29yZy5hcGFjaGUua" + "GFkb29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QQW7kAQEGAQAAAQMJ"; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("(and leaf-0 leaf-1)", sarg.getExpression().toString()); assertEquals(2, sarg.getLeaves().size()); @@ -2853,7 +2856,7 @@ public void TestFloatSarg() throws Exception { "aXZlLnFsLnVkZi5nZW5lcmljLkdlbmVyaWNVREZPUEVxdWHsAQAAAYI9AUVRVUHMAQVvcmcuYXBhY2h" + "lLmhhZG9vcC5pby5Cb29sZWFuV3JpdGFibOUBAAABAgEBYm9vbGVh7g=="; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); @@ -2872,7 +2875,7 @@ public void TestDoubleSarg() throws Exception { "b29wLmhpdmUucWwudWRmLmdlbmVyaWMuR2VuZXJpY1VERk9QRXF1YewBAAABgj0BRVFVQcwBBW9yZy5" + "hcGFjaGUuaGFkb29wLmlvLkJvb2xlYW5Xcml0YWJs5QEAAAECAQFib29sZWHu"; SearchArgument sarg = - new ConvertAstToSearchArg(SerializationUtilities.deserializeExpression(serialAst)) + new ConvertAstToSearchArg(conf, SerializationUtilities.deserializeExpression(serialAst)) .buildSearchArgument(); assertEquals("leaf-0", sarg.getExpression().toString()); assertEquals(1, sarg.getLeaves().size()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 8cbc26d..df42058 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -79,7 +79,7 @@ public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, Object literal, List literalList) { return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName, - literal, literalList); + literal, literalList, null); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 3295372..d689af6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode; import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong; @@ -214,4 +215,12 @@ public void testValidateSMBJoinOperator() { Vectorizer vectorizer = new Vectorizer(); Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } + + @Test + public void testExprNodeDynamicValue() { + ExprNodeDesc exprNode = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.stringTypeInfo)); + Vectorizer v = new Vectorizer(); + Assert.assertTrue(v.validateExprNodeDesc(exprNode, Mode.FILTER)); + Assert.assertTrue(v.validateExprNodeDesc(exprNode, Mode.PROJECTION)); + } } diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q new file mode 100644 index 0000000..13797c0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -0,0 +1,68 @@ +set hive.compute.query.using.stats=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; + +-- Create Tables +create table alltypesorc_int ( cint int, cstring string ) stored as ORC; +create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC; +CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC; + +-- Add Partitions +alter table srcpart_date add partition (ds = "2008-04-08"); +alter table srcpart_date add partition (ds = "2008-04-09"); + +alter table srcpart_small add partition (ds = "2008-04-08"); +alter table srcpart_small add partition (ds = "2008-04-09"); + +-- Load +insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc; +insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; +insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +set hive.tez.dynamic.semijoin.reduction=false; + +-- single column, single key +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +set hive.tez.dynamic.semijoin.reduction=true; + +-- Mix dynamic partition pruning(DPP) and min/max bloom filter optimizations. Should pick the DPP. +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds); +set hive.tez.dynamic.semijoin.reduction=false; + +--multiple sources, single key +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring); +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring); +set hive.tez.dynamic.semijoin.reduction=false; + +-- single source, multiple keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); +set hive.tez.dynamic.semijoin.reduction=true; +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); +set hive.tez.dynamic.semijoin.reduction=false; + +-- multiple sources, different keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); + +drop table srcpart_date; +drop table srcpart_small; +drop table alltypesorc_int; diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out index 21d82d2..913c25f 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out @@ -203,10 +203,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + predicate: ((label) IN ('foo', 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) @@ -376,10 +376,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + predicate: ((label) IN ('foo', 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) @@ -519,10 +519,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: id is not null (type: boolean) + filterExpr: (id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: id is not null (type: boolean) + predicate: (id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) @@ -749,10 +749,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + predicate: ((label) IN ('foo', 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) @@ -904,10 +904,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: dim_shops - filterExpr: ((label = 'foo') and id is not null) (type: boolean) + filterExpr: ((label = 'foo') and id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((label = 'foo') and id is not null) (type: boolean) + predicate: ((label = 'foo') and id is not null and true) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int) @@ -968,10 +968,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: dim_shops - filterExpr: ((label = 'bar') and id is not null) (type: boolean) + filterExpr: ((label = 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((label = 'bar') and id is not null) (type: boolean) + predicate: ((label = 'bar') and id is not null and true) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int) @@ -1155,3 +1155,163 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2000000 +PREHOOK: query: DROP TABLE IF EXISTS src +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src +PREHOOK: Output: default@src +POSTHOOK: query: DROP TABLE IF EXISTS src +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src +PREHOOK: query: DROP TABLE IF EXISTS src1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src1 +POSTHOOK: query: DROP TABLE IF EXISTS src1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src1 +PREHOOK: query: DROP TABLE IF EXISTS src_json +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_json +PREHOOK: Output: default@src_json +POSTHOOK: query: DROP TABLE IF EXISTS src_json +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_json +POSTHOOK: Output: default@src_json +PREHOOK: query: DROP TABLE IF EXISTS src_sequencefile +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_sequencefile +PREHOOK: Output: default@src_sequencefile +POSTHOOK: query: DROP TABLE IF EXISTS src_sequencefile +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_sequencefile +POSTHOOK: Output: default@src_sequencefile +PREHOOK: query: DROP TABLE IF EXISTS src_thrift +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_thrift +PREHOOK: Output: default@src_thrift +POSTHOOK: query: DROP TABLE IF EXISTS src_thrift +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_thrift +POSTHOOK: Output: default@src_thrift +PREHOOK: query: DROP TABLE IF EXISTS srcbucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket +PREHOOK: Output: default@srcbucket +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket +POSTHOOK: Output: default@srcbucket +PREHOOK: query: DROP TABLE IF EXISTS srcbucket2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket2 +PREHOOK: Output: default@srcbucket2 +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket2 +POSTHOOK: Output: default@srcbucket2 +PREHOOK: query: DROP TABLE IF EXISTS srcpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart +PREHOOK: Output: default@srcpart +POSTHOOK: query: DROP TABLE IF EXISTS srcpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart +POSTHOOK: Output: default@srcpart +PREHOOK: query: DROP TABLE IF EXISTS primitives +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS primitives +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_j1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_j1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc +POSTHOOK: query: DROP TABLE IF EXISTS alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc +PREHOOK: query: DROP TABLE IF EXISTS cbo_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t1 +PREHOOK: Output: default@cbo_t1 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Output: default@cbo_t1 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t2 +PREHOOK: Output: default@cbo_t2 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Output: default@cbo_t2 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t3 +PREHOOK: Output: default@cbo_t3 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t3 +POSTHOOK: Output: default@cbo_t3 +PREHOOK: query: DROP TABLE IF EXISTS src_cbo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_cbo +PREHOOK: Output: default@src_cbo +POSTHOOK: query: DROP TABLE IF EXISTS src_cbo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@src_cbo +PREHOOK: query: DROP TABLE IF EXISTS part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part +PREHOOK: Output: default@part +POSTHOOK: query: DROP TABLE IF EXISTS part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE IF EXISTS lineitem +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@lineitem +PREHOOK: Output: default@lineitem +POSTHOOK: query: DROP TABLE IF EXISTS lineitem +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: default@lineitem diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out new file mode 100644 index 0000000..d5b4efa --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -0,0 +1,1551 @@ +PREHOOK: query: -- Create Tables +create table alltypesorc_int ( cint int, cstring string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesorc_int +POSTHOOK: query: -- Create Tables +create table alltypesorc_int ( cint int, cstring string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesorc_int +PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_date +PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_small +PREHOOK: query: -- Add Partitions +alter table srcpart_date add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: -- Add Partitions +alter table srcpart_date add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds=2008-04-08 +PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 +PREHOOK: query: -- Load +insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_int +POSTHOOK: query: -- Load +insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_int +POSTHOOK: Lineage: alltypesorc_int.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypesorc_int.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_date@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_small@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- single column, single key +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +POSTHOOK: query: -- single column, single key +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max)) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max)) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: (key1 is not null and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key1 is not null and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 +PREHOOK: query: -- Mix dynamic partition pruning(DPP) and min/max bloom filter optimizations. Should pick the DPP. +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) +PREHOOK: type: QUERY +POSTHOOK: query: -- Mix dynamic partition pruning(DPP) and min/max bloom filter optimizations. Should pick the DPP. +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and true) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and true) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart_small + Partition key expr: ds + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Target Vertex: Map 4 + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 360000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 +PREHOOK: query: --multiple sources, single key +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: --multiple sources, single key +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 27033 Data size: 2038454 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +48 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 8 <- Reducer 4 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and true) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and true) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: (key1 is not null and true and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key1 is not null and true and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max)) (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max)) (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 27033 Data size: 2038454 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +48 +PREHOOK: query: -- single source, multiple keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +PREHOOK: type: QUERY +POSTHOOK: query: -- single source, multiple keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: (key1 is not null and value1 is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key1 is not null and value1 is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string), value1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max)) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null and key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max)) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: (key1 is not null and value1 is not null and true and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key1 is not null and value1 is not null and true and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string), value1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- multiple sources, different keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: -- multiple sources, different keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 13516 Data size: 1019227 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and true) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and true) (type: boolean) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: (key1 is not null and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key1 is not null and true) (type: boolean) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max)) (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max)) (type: boolean) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 13516 Data size: 1019227 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 +PREHOOK: query: drop table srcpart_date +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_date +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: drop table srcpart_date +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Output: default@srcpart_date +PREHOOK: query: drop table srcpart_small +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_small +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: drop table srcpart_small +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Output: default@srcpart_small +PREHOOK: query: drop table alltypesorc_int +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Output: default@alltypesorc_int +POSTHOOK: query: drop table alltypesorc_int +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Output: default@alltypesorc_int diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java new file mode 100644 index 0000000..bd8a5ce --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import org.apache.hadoop.conf.Configurable; + +/** + * Interface to retrieve a literal value + */ +public interface LiteralDelegate extends Configurable { + + Object getLiteral(); + + String getId(); +} diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java index 8fda95c..3c10c83 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java @@ -18,13 +18,20 @@ package org.apache.hadoop.hive.ql.io.sarg; +import org.apache.hadoop.conf.Configuration; + /** * A factory for creating SearchArguments, as well as modifying those created by this factory. */ public class SearchArgumentFactory { public static SearchArgument.Builder newBuilder() { - return new SearchArgumentImpl.BuilderImpl(); + return newBuilder(null); + } + + public static SearchArgument.Builder newBuilder(Configuration conf) { + return new SearchArgumentImpl.BuilderImpl(conf); } + public static void setPredicateLeafColumn(PredicateLeaf leaf, String newName) { SearchArgumentImpl.PredicateLeafImpl.setColumnName(leaf, newName); } diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 10d8c51..871bcda 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -31,6 +31,8 @@ import java.util.Queue; import java.util.Set; +import org.apache.hadoop.conf.Configuration; + /** * The implementation of SearchArguments. Visible for testing only. */ @@ -57,27 +59,17 @@ public PredicateLeafImpl(Operator operator, Type type, String columnName, Object literal, - List literalList) { + List literalList, Configuration conf) { this.operator = operator; this.type = type; this.columnName = columnName; this.literal = literal; - if (literal != null) { - if (literal.getClass() != type.getValueClass()) { - throw new IllegalArgumentException("Wrong value class " + - literal.getClass().getName() + " for " + type + "." + operator + - " leaf"); - } - } + checkLiteralType(literal, type, conf); this.literalList = literalList; if (literalList != null) { Class valueCls = type.getValueClass(); for(Object lit: literalList) { - if (lit != null && lit.getClass() != valueCls) { - throw new IllegalArgumentException("Wrong value class item " + - lit.getClass().getName() + " for " + type + "." + operator + - " leaf"); - } + checkLiteralType(lit, type, conf); } } } @@ -99,6 +91,10 @@ public String getColumnName() { @Override public Object getLiteral() { + if (literal instanceof LiteralDelegate) { + return ((LiteralDelegate) literal).getLiteral(); + } + // To get around a kryo 2.22 bug while deserialize a Timestamp into Date // (https://github.com/EsotericSoftware/kryo/issues/88) // When we see a Date, convert back into Timestamp @@ -110,6 +106,13 @@ public Object getLiteral() { @Override public List getLiteralList() { + if (literalList.size() > 0 && literalList.get(0) instanceof LiteralDelegate) { + List newLiteraList = new ArrayList(); + for (Object litertalObj : literalList) { + newLiteraList.add(((LiteralDelegate) litertalObj).getLiteral()); + } + return newLiteraList; + } return literalList; } @@ -169,6 +172,23 @@ public static void setColumnName(PredicateLeaf leaf, String newName) { assert leaf instanceof PredicateLeafImpl; ((PredicateLeafImpl)leaf).columnName = newName; } + + protected void checkLiteralType(Object literal, Type type, Configuration conf) { + if (literal == null) { + return; + } + + if (literal instanceof LiteralDelegate) { + // Give it a pass. Optionally, have LiteralDelegate provide a getLiteralClass() to check. + ((LiteralDelegate) literal).setConf(conf); + } else { + if (literal.getClass() != type.getValueClass()) { + throw new IllegalArgumentException("Wrong value class " + + literal.getClass().getName() + " for " + type + "." + operator + + " leaf"); + } + } + } } private final List leaves; @@ -218,6 +238,11 @@ public String toString() { static class BuilderImpl implements Builder { + Configuration conf; + public BuilderImpl(Configuration conf) { + this.conf = conf; + } + // max threshold for CNF conversion. having >8 elements in andList will be // converted to maybe private static final int CNF_COMBINATIONS_THRESHOLD = 256; @@ -291,7 +316,7 @@ public Builder lessThan(String column, PredicateLeaf.Type type, } else { PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, - type, column, literal, null); + type, column, literal, null, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this; @@ -306,7 +331,7 @@ public Builder lessThanEquals(String column, PredicateLeaf.Type type, } else { PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, - type, column, literal, null); + type, column, literal, null, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this; @@ -321,7 +346,7 @@ public Builder equals(String column, PredicateLeaf.Type type, } else { PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, - type, column, literal, null); + type, column, literal, null, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this; @@ -336,7 +361,7 @@ public Builder nullSafeEquals(String column, PredicateLeaf.Type type, } else { PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - type, column, literal, null); + type, column, literal, null, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this; @@ -358,7 +383,7 @@ public Builder in(String column, PredicateLeaf.Type type, PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.IN, - type, column, null, argList); + type, column, null, argList, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this; @@ -372,7 +397,7 @@ public Builder isNull(String column, PredicateLeaf.Type type) { } else { PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, - type, column, null, null); + type, column, null, null, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this; @@ -390,7 +415,7 @@ public Builder between(String column, PredicateLeaf.Type type, Object lower, argList.add(upper); PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, - type, column, null, argList); + type, column, null, argList, conf); parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } return this;