diff --git a/hbase-handler/src/test/results/positive/hbase_custom_key2.q.out b/hbase-handler/src/test/results/positive/hbase_custom_key2.q.out index 2077727d4e..04b67c3732 100644 --- a/hbase-handler/src/test/results/positive/hbase_custom_key2.q.out +++ b/hbase-handler/src/test/results/positive/hbase_custom_key2.q.out @@ -111,10 +111,10 @@ STAGE PLANS: Processor Tree: TableScan alias: hbase_ck_4 - filterExpr: ((key.col1 < '27') and (key.col1 >= '165')) (type: boolean) + filterExpr: ((key.col1 >= '165') and (key.col1 < '27')) (type: boolean) Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key.col1 < '27') and (key.col1 >= '165')) (type: boolean) + predicate: ((key.col1 >= '165') and (key.col1 < '27')) (type: boolean) Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: struct), value (type: string) diff --git a/hbase-handler/src/test/results/positive/hbase_custom_key3.q.out b/hbase-handler/src/test/results/positive/hbase_custom_key3.q.out index 083f5f0546..66f2a16ecf 100644 --- a/hbase-handler/src/test/results/positive/hbase_custom_key3.q.out +++ b/hbase-handler/src/test/results/positive/hbase_custom_key3.q.out @@ -113,7 +113,7 @@ STAGE PLANS: alias: hbase_ck_5 Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key.col1 < '27') and (key.col1 >= '165')) (type: boolean) + predicate: ((key.col1 >= '165') and (key.col1 < '27')) (type: boolean) Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: struct), value (type: string) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index b0c586201b..76e460ed7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1361,6 +1361,13 @@ public static boolean isOpPositive(ExprNodeDesc desc) { return GenericUDFOPPositive.class == getGenericUDFClassFromExprDesc(desc); } + /** + * Returns whether the exprNodeDesc is a node of "negative". + */ + public static boolean isOpNegative(ExprNodeDesc desc) { + return GenericUDFOPNegative.class == getGenericUDFClassFromExprDesc(desc); + } + /** * Returns whether the exprNodeDesc is node of "cast". */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java index 8e74f8a8b8..8bd098b3d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java @@ -65,16 +65,15 @@ public ColumnInfo getColumnInfo(String tabAlias, String alias) { for (ColumnInfo columnInfo: this.signature) { if (columnInfo.getTabAlias() == null) { if (tabAlias == null) { - if(columnInfo.getAlias() != null && alias != null && + if(columnInfo.getAlias() != null && columnInfo.getAlias().equals(alias)) { return columnInfo; } } - } - else { + } else { if (tabAlias != null) { if (columnInfo.getTabAlias().equals(tabAlias) && - columnInfo.getAlias() != null && alias != null && + columnInfo.getAlias() != null && columnInfo.getAlias().equals(alias)) { return columnInfo; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 3346f416e9..2d3daeb771 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -235,7 +235,6 @@ private static ExprNodeConstantDesc typeCast(ExprNodeDesc desc, TypeInfo ti, boo } public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) { - GenericUDF udf = funcDesc.getGenericUDF(); if (!isConstantFoldableUdf(udf, funcDesc.getChildren())) { return funcDesc; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java index e04a2b1b97..8f5f4e9237 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java @@ -111,7 +111,7 @@ protected abstract void generatePredicate(NodeProcessorCtx procCtx, FilterOperat * @throws UDFArgumentException */ protected void addPruningPred(Map opToPrunner, - TableScanOperator top, ExprNodeDesc new_pruner_pred) throws UDFArgumentException { + TableScanOperator top, ExprNodeDesc new_pruner_pred) throws SemanticException { ExprNodeDesc old_pruner_pred = opToPrunner.get(top); ExprNodeDesc pruner_pred = null; if (old_pruner_pred != null) { @@ -139,7 +139,7 @@ protected void addPruningPred(Map opToPrunner, */ protected void addPruningPred(Map> opToPrunner, TableScanOperator top, ExprNodeDesc new_pruner_pred, Partition part) - throws UDFArgumentException { + throws SemanticException { Map oldPartToPruner = opToPrunner.get(top); Map partToPruner = null; ExprNodeDesc pruner_pred = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 4a18cfef54..55b32c05a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -49,6 +49,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexSubQuery; @@ -59,6 +60,8 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; @@ -80,8 +83,6 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; @@ -98,9 +99,6 @@ public class HiveCalciteUtil { - private static final Logger LOG = LoggerFactory.getLogger(HiveCalciteUtil.class); - - /** * Get list of virtual columns from the given list of projections. *

@@ -1062,6 +1060,25 @@ public RexNode apply(RelDataTypeField input) { return HiveProject.create(input, copyInputRefs, null); } + public static boolean isConstant(RexNode expr) { + if (expr instanceof RexCall) { + RexCall call = (RexCall) expr; + if (call.getOperator() == SqlStdOperatorTable.ROW || + call.getOperator() == SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR || + call.getOperator() == SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR) { + // We check all operands + for (RexNode node : ((RexCall) expr).getOperands()) { + if (!isConstant(node)) { + return false; + } + } + // All literals + return true; + } + } + return expr.isA(SqlKind.LITERAL); + } + /** * Walks over an expression and determines whether it is constant. */ @@ -1157,4 +1174,112 @@ public Void visitInputRef(RexInputRef inputRef) { return inputRefSet; } } + + /** Fixes up the type of all {@link RexInputRef}s in an + * expression to match differences in nullability. + * + *

Throws if there any greater inconsistencies of type. */ + public static List fixUp(final RexBuilder rexBuilder, + List nodes, final List fieldTypes) { + return new FixNullabilityShuttle(rexBuilder, fieldTypes).apply(nodes); + } + + /** Fixes up the type of all {@link RexInputRef}s in an + * expression to match differences in nullability. + * + *

Throws if there any greater inconsistencies of type. */ + public static RexNode fixUp(final RexBuilder rexBuilder, + RexNode node, final List fieldTypes) { + return new FixNullabilityShuttle(rexBuilder, fieldTypes).apply(node); + } + + /** Shuttle that fixes up an expression to match changes in nullability of + * input fields. */ + public static class FixNullabilityShuttle extends RexShuttle { + private final List typeList; + private final RexBuilder rexBuilder; + + public FixNullabilityShuttle(RexBuilder rexBuilder, + List typeList) { + this.typeList = typeList; + this.rexBuilder = rexBuilder; + } + + @Override public RexNode visitInputRef(RexInputRef ref) { + final RelDataType rightType = typeList.get(ref.getIndex()); + final RelDataType refType = ref.getType(); + if (refType == rightType) { + return ref; + } + final RelDataType refType2 = + rexBuilder.getTypeFactory().createTypeWithNullability(refType, + rightType.isNullable()); + // This is a validation check which can become quite handy debugging type + // issues. Basically, we need both types to be equal, only difference should + // be nullability. + // However, we make an exception for Hive wrt CHAR type because Hive encodes + // the STRING type for literals within CHAR value (see {@link HiveNlsString}) + // while Calcite always considers these literals to be a CHAR, which means + // that the reference may be created as a STRING or VARCHAR from AST node + // at parsing time but the actual type referenced is a CHAR. + if (refType2 == rightType) { + return new RexInputRef(ref.getIndex(), refType2); + } else if (refType2.getFamily() == SqlTypeFamily.CHARACTER && + rightType.getSqlTypeName() == SqlTypeName.CHAR && !rightType.isNullable()) { + return new RexInputRef(ref.getIndex(), rightType); + } + throw new AssertionError("mismatched type " + ref + " " + rightType); + } + } + + public static List transformIntoOrAndClause(List operands, RexBuilder rexBuilder) { + final List disjuncts = new ArrayList<>(operands.size() - 2); + if (operands.get(0).getKind() != SqlKind.ROW) { + final RexNode columnExpression = operands.get(0); + if (!isDeterministic(columnExpression)) { + // Bail out + return null; + } + for (int i = 1; i < operands.size(); i++) { + final RexNode valueExpression = operands.get(i); + if (!isDeterministic(valueExpression)) { + // Bail out + return null; + } + disjuncts.add(rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, + columnExpression, + valueExpression)); + } + } else { + final RexCall columnExpressions = (RexCall) operands.get(0); + if (!isDeterministic(columnExpressions)) { + // Bail out + return null; + } + for (int i = 1; i < operands.size(); i++) { + List conjuncts = new ArrayList<>(columnExpressions.getOperands().size() - 1); + RexCall valueExpressions = (RexCall) operands.get(i); + if (!isDeterministic(valueExpressions)) { + // Bail out + return null; + } + for (int j = 0; j < columnExpressions.getOperands().size(); j++) { + conjuncts.add(rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, + columnExpressions.getOperands().get(j), + valueExpressions.getOperands().get(j))); + } + if (conjuncts.size() > 1) { + disjuncts.add(rexBuilder.makeCall( + SqlStdOperatorTable.AND, + conjuncts)); + } else { + disjuncts.add(conjuncts.get(0)); + } + } + } + return disjuncts; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java index 1dede0f88a..08b4e8db4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java @@ -15,44 +15,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.ql.optimizer.calcite; import java.util.HashSet; import java.util.List; -import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexExecutorImpl; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - +/** + * Executor for {@link RexNode} based on Hive semantics. + */ public class HiveRexExecutorImpl extends RexExecutorImpl { private static final Logger LOG = LoggerFactory.getLogger(HiveRexExecutorImpl.class); - private final RelOptCluster cluster; - public HiveRexExecutorImpl(RelOptCluster cluster) { + public HiveRexExecutorImpl() { super(null); - this.cluster = cluster; } @Override public void reduce(RexBuilder rexBuilder, List constExps, List reducedValues) { - RexNodeConverter rexNodeConverter = new RexNodeConverter(cluster); + RexNodeConverter rexNodeConverter = new RexNodeConverter(rexBuilder, rexBuilder.getTypeFactory()); for (RexNode rexNode : constExps) { // initialize the converter ExprNodeConverter converter = new ExprNodeConverter("", null, null, null, - new HashSet(), cluster.getTypeFactory()); + new HashSet<>(), rexBuilder.getTypeFactory()); // convert RexNode to ExprNodeGenericFuncDesc ExprNodeDesc expr = rexNode.accept(converter); if (expr instanceof ExprNodeGenericFuncDesc) { @@ -60,20 +59,27 @@ public void reduce(RexBuilder rexBuilder, List constExps, List ExprNodeDesc constant = ConstantPropagateProcFactory .foldExpr((ExprNodeGenericFuncDesc) expr); if (constant != null) { - try { - // convert constant back to RexNode - reducedValues.add(rexNodeConverter.convert(constant)); - } catch (Exception e) { - LOG.warn(e.getMessage()); - reducedValues.add(rexNode); - } + addExpressionToList(constant, rexNode, rexNodeConverter, reducedValues); } else { reducedValues.add(rexNode); } + } else if (expr instanceof ExprNodeConstantDesc) { + addExpressionToList(expr, rexNode, rexNodeConverter, reducedValues); } else { reducedValues.add(rexNode); } } } + private void addExpressionToList(ExprNodeDesc reducedExpr, RexNode originalExpr, + RexNodeConverter rexNodeConverter, List reducedValues) { + try { + // convert constant back to RexNode + reducedValues.add(rexNodeConverter.convert(reducedExpr)); + } catch (Exception e) { + LOG.warn(e.getMessage()); + reducedValues.add(originalExpr); + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java index 4a5c9cc567..5e301fdbcb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java @@ -78,6 +78,7 @@ public int getDefaultPrecision(SqlTypeName typeName) { // Binary doesn't need any sizes; Decimal has the default of 10. case BINARY: case VARBINARY: + return RelDataType.PRECISION_NOT_SPECIFIED; case TIME: case TIMESTAMP: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRexExprList.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRexExprList.java new file mode 100644 index 0000000000..707bcca0b5 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRexExprList.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBiVisitor; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitor; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; + +/** + * This class represents the equivalent to {@link ExprNodeColumnListDesc} + * in a Calcite expression. It is not supposed to be used through planning + * and should be immediately expanded after it has been generated by + * the parser. + */ +public class HiveRexExprList extends RexNode { + + final List expressions = new ArrayList<>(); + + public void addExpression(RexNode expression) { + expressions.add(expression); + } + + public List getExpressions() { + return expressions; + } + + @Override + public RelDataType getType() { + throw new UnsupportedOperationException(); + } + + @Override + public R accept(RexVisitor visitor) { + throw new UnsupportedOperationException(); + } + + @Override + public R accept(RexBiVisitor visitor, P arg) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof HiveRexExprList) { + return this.expressions.equals(((HiveRexExprList) obj).expressions); + } + return false; + } + + @Override + public int hashCode() { + return expressions.hashCode(); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java new file mode 100644 index 0000000000..b5ab01c9ad --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.sql.fun.SqlAbstractTimeFunction; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Sql UNIX_TIMESTAMP calcite operator. + */ +public class HiveToUnixTimestampSqlOperator extends SqlAbstractTimeFunction { + public static final HiveToUnixTimestampSqlOperator INSTANCE = new HiveToUnixTimestampSqlOperator(); + protected HiveToUnixTimestampSqlOperator() { + super("UNIX_TIMESTAMP", SqlTypeName.BIGINT); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java index 3b3557063e..d8eb148f6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java @@ -33,6 +33,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.FilterSelectivityEstimator; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize; @@ -223,7 +224,13 @@ public Double visitCall(RexCall call) { return null; } cost += operandCost; - Double size = HiveRelMdSize.averageTypeSize(operand.getType()); + Double size; + if (operand.isA(SqlKind.LITERAL)) { + size = HiveRelMdSize.typeSize(operand.getType(), + ((RexLiteral) operand).getValueAs(Comparable.class)); + } else { + size = HiveRelMdSize.averageTypeSize(operand.getType()); + } if (size == null) { return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index 05c7368c42..c29a287105 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -39,6 +39,7 @@ import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo; @@ -222,7 +223,7 @@ public void onMatch(RelOptRuleCall call) { .collect(Collectors.toList()); } // Fix nullability in references to the input node - topProjExprs = RexUtil.fixUp(rexBuilder, topProjExprs, RelOptUtil.getFieldTypeList(fkInput.getRowType())); + topProjExprs = HiveCalciteUtil.fixUp(rexBuilder, topProjExprs, RelOptUtil.getFieldTypeList(fkInput.getRowType())); // Trigger transformation if (nullableNodes.isEmpty()) { call.transformTo(call.builder() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCExpandExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCExpandExpressionsRule.java index 1c8a100219..aa23cf9a6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCExpandExpressionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCExpandExpressionsRule.java @@ -35,6 +35,7 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -196,24 +197,11 @@ private RexNode transformIntoOrAndClause(RexBuilder rexBuilder, RexCall expressi return expression; } - final List disjuncts = new ArrayList<>(expression.getOperands().size() - 2); - final RexCall columnExpressions = (RexCall) expression.getOperands().get(0); - for (int i = 1; i < expression.getOperands().size(); i++) { - List conjuncts = new ArrayList<>(columnExpressions.getOperands().size() - 1); - RexCall valueExpressions = (RexCall) expression.getOperands().get(i); - for (int j = 0; j < columnExpressions.getOperands().size(); j++) { - conjuncts.add(rexBuilder.makeCall( - SqlStdOperatorTable.EQUALS, - columnExpressions.getOperands().get(j), - valueExpressions.getOperands().get(j))); - } - if (conjuncts.size() > 1) { - disjuncts.add(rexBuilder.makeCall( - SqlStdOperatorTable.AND, - conjuncts)); - } else { - disjuncts.add(conjuncts.get(0)); - } + final List disjuncts = HiveCalciteUtil.transformIntoOrAndClause( + expression.getOperands(), rexBuilder); + if (disjuncts == null) { + // We could not execute transformation, return expression + return expression; } if (disjuncts.size() > 1) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java index eebeb4c67e..83b867d3b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java @@ -19,6 +19,7 @@ import java.util.List; +import org.apache.calcite.avatica.util.ByteString; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdSize; @@ -28,6 +29,7 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableNullableList; +import org.apache.calcite.util.NlsString; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; @@ -183,4 +185,50 @@ public static Double averageTypeSize(RelDataType type) { } } + public static double typeSize(RelDataType type, Comparable value) { + if (value == null) { + return 1d; + } + switch (type.getSqlTypeName()) { + case BOOLEAN: + case TINYINT: + return 1d; + case SMALLINT: + return 2d; + case INTEGER: + case FLOAT: + case REAL: + case DATE: + case TIME: + case TIME_WITH_LOCAL_TIME_ZONE: + case INTERVAL_YEAR: + case INTERVAL_YEAR_MONTH: + case INTERVAL_MONTH: + return 4d; + case BIGINT: + case DOUBLE: + case TIMESTAMP: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return 8d; + case BINARY: + case VARBINARY: + return ((ByteString) value).length(); + case CHAR: + case VARCHAR: + return ((NlsString) value).getValue().length() * BYTES_PER_CHARACTER; + default: + return 32; + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index cf104af00b..c6de339fd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Set; +import org.apache.calcite.avatica.util.ByteString; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -58,7 +59,6 @@ import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.RexVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter.HiveNlsString; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; @@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -227,160 +228,165 @@ public ExprNodeDesc visitCall(RexCall call) { @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { + return toExprNodeConstantDesc(literal); + } + + public static ExprNodeConstantDesc toExprNodeConstantDesc(RexLiteral literal) { RelDataType lType = literal.getType(); if (RexLiteral.value(literal) == null) { switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, null); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, null); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, null); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, null); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, null); - case FLOAT: - case REAL: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, null); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, null); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, null); - case TIME: - case TIMESTAMP: - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, null); - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - HiveConf conf; - try { - conf = Hive.get().getConf(); - } catch (HiveException e) { - throw new RuntimeException(e); - } - return new ExprNodeConstantDesc( - TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), null); - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, null); - case DECIMAL: - return new ExprNodeConstantDesc( - TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), lType.getScale()), null); - case VARCHAR: - case CHAR: - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null); - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, null); - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, null); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, null); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, null); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, null); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, null); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, null); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, null); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, null); + case TIME: + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, null); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + HiveConf conf; + try { + conf = Hive.get().getConf(); + } catch (HiveException e) { + throw new RuntimeException(e); + } + return new ExprNodeConstantDesc( + TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), null); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, null); + case DECIMAL: + return new ExprNodeConstantDesc( + TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), lType.getScale()), null); + case VARCHAR: + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null); + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, null); + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); } } else { switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral - .booleanValue(literal))); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal - .getValue3()).byteValue())); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, - Short.valueOf(((Number) literal.getValue3()).shortValue())); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, - Integer.valueOf(((Number) literal.getValue3()).intValue())); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal - .getValue3()).longValue())); - case FLOAT: - case REAL: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, - Float.valueOf(((Number) literal.getValue3()).floatValue())); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, - Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - Date.valueOf(literal.getValueAs(DateString.class).toString())); - case TIME: - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, - Timestamp.valueOf(literal.getValueAs(TimeString.class).toString())); - case TIMESTAMP: - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, - Timestamp.valueOf(literal.getValueAs(TimestampString.class).toString())); - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - HiveConf conf; - try { - conf = Hive.get().getConf(); - } catch (HiveException e) { - throw new RuntimeException(e); - } - // Calcite stores timestamp with local time-zone in UTC internally, thus - // when we bring it back, we need to add the UTC suffix. - return new ExprNodeConstantDesc(TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), - TimestampTZUtil.parse(literal.getValueAs(TimestampString.class).toString() + " UTC")); - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); - case DECIMAL: - return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), - lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); - case VARCHAR: - case CHAR: { - if (literal.getValue() instanceof HiveNlsString) { - HiveNlsString mxNlsString = (HiveNlsString) literal.getValue(); - switch (mxNlsString.interpretation) { - case STRING: - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); - case CHAR: { - int precision = lType.getPrecision(); - HiveChar value = new HiveChar((String) literal.getValue3(), precision); - return new ExprNodeConstantDesc(new CharTypeInfo(precision), value); - } - case VARCHAR: { - int precision = lType.getPrecision(); - HiveVarchar value = new HiveVarchar((String) literal.getValue3(), precision); - return new ExprNodeConstantDesc(new VarcharTypeInfo(precision), value); + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral + .booleanValue(literal))); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal + .getValue3()).byteValue())); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + Short.valueOf(((Number) literal.getValue3()).shortValue())); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + Integer.valueOf(((Number) literal.getValue3()).intValue())); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal + .getValue3()).longValue())); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + Float.valueOf(((Number) literal.getValue3()).floatValue())); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + Double.valueOf(((Number) literal.getValue3()).doubleValue())); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + Date.valueOf(literal.getValueAs(DateString.class).toString())); + case TIME: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + Timestamp.valueOf(literal.getValueAs(TimeString.class).toString())); + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + Timestamp.valueOf(literal.getValueAs(TimestampString.class).toString())); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + HiveConf conf; + try { + conf = Hive.get().getConf(); + } catch (HiveException e) { + throw new RuntimeException(e); } + // Calcite stores timestamp with local time-zone in UTC internally, thus + // when we bring it back, we need to add the UTC suffix. + return new ExprNodeConstantDesc(TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), + TimestampTZUtil.parse(literal.getValueAs(TimestampString.class).toString() + " UTC")); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, + literal.getValueAs(ByteString.class).getBytes()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), + lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); + case VARCHAR: + case CHAR: { + if (literal.getValue() instanceof HiveNlsString) { + HiveNlsString mxNlsString = (HiveNlsString) literal.getValue(); + switch (mxNlsString.interpretation) { + case STRING: + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); + case CHAR: { + int precision = lType.getPrecision(); + HiveChar value = new HiveChar((String) literal.getValue3(), precision); + return new ExprNodeConstantDesc(new CharTypeInfo(precision), value); + } + case VARCHAR: { + int precision = lType.getPrecision(); + HiveVarchar value = new HiveVarchar((String) literal.getValue3(), precision); + return new ExprNodeConstantDesc(new VarcharTypeInfo(precision), value); + } + } } + throw new RuntimeException("varchar/string/char values must use HiveNlsString for correctness"); } - throw new RuntimeException("varchar/string/char values must use HiveNlsString for correctness"); - } - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: { - BigDecimal monthsBd = (BigDecimal) literal.getValue(); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - new HiveIntervalYearMonth(monthsBd.intValue())); - } - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: { - BigDecimal millisBd = (BigDecimal) literal.getValue(); - // Calcite literal is in millis, we need to convert to seconds - BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(secsBd)); - } - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: { + BigDecimal monthsBd = (BigDecimal) literal.getValue(); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + new HiveIntervalYearMonth(monthsBd.intValue())); + } + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: { + BigDecimal millisBd = (BigDecimal) literal.getValue(); + // Calcite literal is in millis, we need to convert to seconds + BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(secsBd)); + } + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index ba6eefb2d2..67ba55b2e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -19,36 +19,27 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlBinaryOperator; -import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlCastFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.fun.SqlQuantifyOperator; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; -import org.apache.calcite.util.ConversionUtil; import org.apache.calcite.util.DateString; -import org.apache.calcite.util.NlsString; import org.apache.calcite.util.TimestampString; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Decimal128; @@ -59,28 +50,21 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; -import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter.HiveNlsString.Interpretation; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString.Interpretation; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; -import org.apache.hadoop.hive.ql.plan.SubqueryType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -110,77 +94,27 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import java.math.BigDecimal; -import java.math.BigInteger; import java.time.Instant; import java.util.ArrayList; import java.util.Calendar; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; +/** + * Class that contains logic to translate Hive expressions ({@link ExprNodeDesc}) + * into Calcite expressions ({@link RexNode}). + */ public class RexNodeConverter { - private static class InputCtx { - private final RelDataType calciteInpDataType; - private final ImmutableMap hiveNameToPosMap; - private final RowResolver hiveRR; - private final int offsetInCalciteSchema; - - private InputCtx(RelDataType calciteInpDataType, ImmutableMap hiveNameToPosMap, - RowResolver hiveRR, int offsetInCalciteSchema) { - this.calciteInpDataType = calciteInpDataType; - this.hiveNameToPosMap = hiveNameToPosMap; - this.hiveRR = hiveRR; - this.offsetInCalciteSchema = offsetInCalciteSchema; - } - }; - - private final RelOptCluster cluster; - private final ImmutableList inputCtxs; - private final boolean flattenExpr; - - //outerRR belongs to outer query and is required to resolve correlated references - private final RowResolver outerRR; - private final ImmutableMap outerNameToPosMap; - private int correlatedId; - private final int maxNodesForInToOrTransformation; - - //Constructor used by HiveRexExecutorImpl - public RexNodeConverter(RelOptCluster cluster) { - this(cluster, new ArrayList(), false); - } + private final RexBuilder rexBuilder; + private final RelDataTypeFactory typeFactory; - //subqueries will need outer query's row resolver - public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, - ImmutableMap outerNameToPosMap, - ImmutableMap nameToPosMap, RowResolver hiveRR, RowResolver outerRR, - int maxNodesForInToOrTransformation, int offset, boolean flattenExpr, int correlatedId) { - this.cluster = cluster; - this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR, offset)); - this.flattenExpr = flattenExpr; - this.outerRR = outerRR; - this.outerNameToPosMap = outerNameToPosMap; - this.correlatedId = correlatedId; - this.maxNodesForInToOrTransformation = maxNodesForInToOrTransformation; - } - public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, - ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { - this.cluster = cluster; - this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset)); - this.flattenExpr = flattenExpr; - this.outerRR = null; - this.outerNameToPosMap = null; - this.maxNodesForInToOrTransformation = 0; - } - - public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { - this.cluster = cluster; - this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build(); - this.flattenExpr = flattenExpr; - this.outerRR = null; - this.outerNameToPosMap = null; - this.maxNodesForInToOrTransformation = 0; + /** + * Constructor used by HiveRexExecutorImpl. + */ + public RexNodeConverter(RexBuilder rexBuilder, RelDataTypeFactory typeFactory) { + this.rexBuilder = rexBuilder; + this.typeFactory = typeFactory; } public RexNode convert(ExprNodeDesc expr) throws SemanticException { @@ -188,128 +122,19 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { return convert((ExprNodeGenericFuncDesc) expr); } else if (expr instanceof ExprNodeConstantDesc) { return convert((ExprNodeConstantDesc) expr); - } else if (expr instanceof ExprNodeColumnDesc) { - return convert((ExprNodeColumnDesc) expr); } else if (expr instanceof ExprNodeFieldDesc) { return convert((ExprNodeFieldDesc) expr); - } else if(expr instanceof ExprNodeSubQueryDesc) { - return convert((ExprNodeSubQueryDesc) expr); } else { throw new RuntimeException("Unsupported Expression"); } - // TODO: handle ExprNodeColumnListDesc - } - - private RexNode getSomeSubquery(final RelNode subqueryRel, final RexNode lhs, - final SqlQuantifyOperator quantifyOperator) { - if(quantifyOperator == SqlStdOperatorTable.SOME_EQ) { - return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); - } else if (quantifyOperator == SqlStdOperatorTable.SOME_NE) { - RexSubQuery subQuery = RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); - return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, subQuery); - } else { - return RexSubQuery.some(subqueryRel, ImmutableList.of(lhs), quantifyOperator); - } - } - - private void throwInvalidSubqueryError(final ASTNode comparisonOp) throws SemanticException { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid operator:" + comparisonOp.toString())); - } - - // <>ANY and =ALL is not supported - private RexNode convertSubquerySomeAll(final ExprNodeSubQueryDesc subQueryDesc) - throws SemanticException { - assert(subQueryDesc.getType() == SubqueryType.SOME - || subQueryDesc.getType() == SubqueryType.ALL); - - RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); - ASTNode comparisonOp = subQueryDesc.getComparisonOp(); - SqlQuantifyOperator quantifyOperator = null; - - switch (comparisonOp.getType()) { - case HiveParser.EQUAL: - if(subQueryDesc.getType() == SubqueryType.ALL) { - throwInvalidSubqueryError(comparisonOp); - } - quantifyOperator = SqlStdOperatorTable.SOME_EQ; - break; - case HiveParser.LESSTHAN: - quantifyOperator = SqlStdOperatorTable.SOME_LT; - break; - case HiveParser.LESSTHANOREQUALTO: - quantifyOperator = SqlStdOperatorTable.SOME_LE; - break; - case HiveParser.GREATERTHAN: - quantifyOperator = SqlStdOperatorTable.SOME_GT; - break; - case HiveParser.GREATERTHANOREQUALTO: - quantifyOperator = SqlStdOperatorTable.SOME_GE; - break; - case HiveParser.NOTEQUAL: - if(subQueryDesc.getType() == SubqueryType.SOME) { - throwInvalidSubqueryError(comparisonOp); - } - quantifyOperator = SqlStdOperatorTable.SOME_NE; - break; - default: - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid operator:" + comparisonOp.toString())); - } - - if(subQueryDesc.getType() == SubqueryType.ALL) { - quantifyOperator = SqlStdOperatorTable.some(quantifyOperator.comparisonKind.negateNullSafe()); - } - RexNode someQuery = getSomeSubquery(subQueryDesc.getRexSubQuery(), rexNodeLhs, - quantifyOperator); - if(subQueryDesc.getType() == SubqueryType.ALL) { - return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, someQuery); - } - return someQuery; - } - - private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { - if(subQueryDesc.getType() == SubqueryType.IN) { - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ - if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "SubQuery can contain only 1 item in Select List.")); - } - //create RexNode for LHS - RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); - - //create RexSubQuery node - RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(), - ImmutableList.of(rexNodeLhs)); - return rexSubQuery; - } else if(subQueryDesc.getType() == SubqueryType.EXISTS) { - RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery()); - return subQueryNode; - } else if(subQueryDesc.getType() == SubqueryType.SCALAR){ - if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "SubQuery can contain only 1 item in Select List.")); - } - //create RexSubQuery node - RexNode rexSubQuery = RexSubQuery.scalar(subQueryDesc.getRexSubQuery()); - return rexSubQuery; - } else if(subQueryDesc.getType() == SubqueryType.SOME - || subQueryDesc.getType() == SubqueryType.ALL) { - return convertSubquerySomeAll(subQueryDesc); - } else { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid subquery: " + subQueryDesc.getType())); - } + // TODO: Handle ExprNodeColumnDesc, ExprNodeColumnListDesc } private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { RexNode rexNode = convert(fieldDesc.getDesc()); if (rexNode.getType().isStruct()) { // regular case of accessing nested field in a column - return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); + return rexBuilder.makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); } else { // This may happen for schema-less tables, where columns are dynamically // supplied by serdes. @@ -399,32 +224,31 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { isAllPrimitive = isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE; - argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); + argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), typeFactory)); tmpRN = convert(tmpExprNode); childRexNodeLst.add(tmpRN); } // See if this is an explicit cast. - RexNode expr = null; - RelDataType retType = null; - expr = handleExplicitCast(func, childRexNodeLst); + RelDataType retType = TypeConverter.convert(func.getTypeInfo(), typeFactory); + RexNode expr = handleExplicitCast(func.getGenericUDF(), retType, childRexNodeLst, + rexBuilder); if (expr == null) { // This is not a cast; process the function. - retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); if (calciteOp.getKind() == SqlKind.CASE) { // If it is a case operator, we need to rewrite it - childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst); + childRexNodeLst = rewriteCaseChildren(func.getFuncText(), childRexNodeLst, rexBuilder); // Adjust branch types by inserting explicit casts if the actual is ambigous - childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType); + childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder); } else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a extract operator, we need to rewrite it - childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst); + childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst, rexBuilder); } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a floor operator, we need to rewrite it - childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst); + childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst, rexBuilder); } else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) { if (childRexNodeLst.size() == 2) { // if it is a single item in an IN clause, transform A IN (B) to A = B @@ -437,27 +261,19 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { // except complex types // Rewrite to OR is done only if number of operands are less than // the threshold configured - boolean rewriteToOr = true; - if(this.maxNodesForInToOrTransformation != 0) { - if(childRexNodeLst.size() > this.maxNodesForInToOrTransformation) { - rewriteToOr = false; - } - } - if(rewriteToOr) { - childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); - calciteOp = SqlStdOperatorTable.OR; - } + childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst, rexBuilder); + calciteOp = SqlStdOperatorTable.OR; } } else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) { // Rewrite COALESCE as a CASE // This allows to be further reduced to OR, if possible calciteOp = SqlStdOperatorTable.CASE; - childRexNodeLst = rewriteCoalesceChildren(func, childRexNodeLst); + childRexNodeLst = rewriteCoalesceChildren(childRexNodeLst, rexBuilder); // Adjust branch types by inserting explicit casts if the actual is ambigous - childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType); + childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder); } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { - childRexNodeLst = rewriteToDateChildren(childRexNodeLst); + childRexNodeLst = rewriteToDateChildren(childRexNodeLst, rexBuilder); } else if (calciteOp.getKind() == SqlKind.BETWEEN) { assert childRexNodeLst.get(0).isAlwaysTrue() || childRexNodeLst.get(0).isAlwaysFalse(); boolean invert = childRexNodeLst.get(0).isAlwaysTrue(); @@ -473,29 +289,29 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { RexNode rangeL = childRexNodeLst.get(2); RexNode rangeH = childRexNodeLst.get(3); childRexNodeLst.clear(); - childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, rangeL, op)); - childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, op, rangeH)); + childRexNodeLst.add(rexBuilder.makeCall(cmpOp, rangeL, op)); + childRexNodeLst.add(rexBuilder.makeCall(cmpOp, op, rangeH)); } - expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); + expr = rexBuilder.makeCall(retType, calciteOp, childRexNodeLst); } else { retType = expr.getType(); } // TODO: Cast Function in Calcite have a bug where it infer type on cast throws // an exception - if (flattenExpr && (expr instanceof RexCall) + if (expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { RexCall call = (RexCall) expr; - expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), + expr = rexBuilder.makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator())); } return expr; } - private boolean castExprUsingUDFBridge(GenericUDF gUDF) { + private static boolean castExprUsingUDFBridge(GenericUDF gUDF) { boolean castExpr = false; - if (gUDF != null && gUDF instanceof GenericUDFBridge) { + if (gUDF instanceof GenericUDFBridge) { String udfClassName = ((GenericUDFBridge) gUDF).getUdfClassName(); if (udfClassName != null) { int sp = udfClassName.lastIndexOf('.'); @@ -515,20 +331,17 @@ private boolean castExprUsingUDFBridge(GenericUDF gUDF) { return castExpr; } - private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List childRexNodeLst) - throws CalciteSemanticException { + public static RexNode handleExplicitCast(GenericUDF udf, RelDataType returnType, List childRexNodeLst, + RexBuilder rexBuilder) { RexNode castExpr = null; if (childRexNodeLst != null && childRexNodeLst.size() == 1) { - GenericUDF udf = func.getGenericUDF(); if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar) || (udf instanceof GenericUDFToString) || (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate) || (udf instanceof GenericUDFTimestamp) || (udf instanceof GenericUDFToTimestampLocalTZ) || (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) { - castExpr = cluster.getRexBuilder().makeAbstractCast( - TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()), - childRexNodeLst.get(0)); + castExpr = rexBuilder.makeAbstractCast(returnType, childRexNodeLst.get(0)); } } @@ -550,10 +363,10 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c * It will be transformed into: * CASE WHEN =(x + y, 1) THEN 'fee' WHEN =(x + y, 2) THEN 'fie' ELSE null END */ - private List rewriteCaseChildren(ExprNodeGenericFuncDesc func, List childRexNodeLst) - throws SemanticException { - List newChildRexNodeLst = new ArrayList(); - if (FunctionRegistry.getNormalizedFunctionName(func.getFuncText()).equals("case")) { + public static List rewriteCaseChildren(String funcText, List childRexNodeLst, + RexBuilder rexBuilder) throws SemanticException { + List newChildRexNodeLst = new ArrayList<>(); + if (FunctionRegistry.getNormalizedFunctionName(funcText).equals("case")) { RexNode firstPred = childRexNodeLst.get(0); int length = childRexNodeLst.size() % 2 == 1 ? childRexNodeLst.size() : childRexNodeLst.size() - 1; @@ -561,7 +374,7 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c if (i % 2 == 1) { // We rewrite it newChildRexNodeLst.add( - cluster.getRexBuilder().makeCall( + rexBuilder.makeCall( SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); } else { newChildRexNodeLst.add(childRexNodeLst.get(i)); @@ -576,7 +389,7 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c } // Calcite always needs the else clause to be defined explicitly if (newChildRexNodeLst.size() % 2 == 0) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeNullLiteral( + newChildRexNodeLst.add(rexBuilder.makeNullLiteral( newChildRexNodeLst.get(newChildRexNodeLst.size()-1).getType())); } return newChildRexNodeLst; @@ -588,13 +401,13 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c * Calcite is more stricter than hive w.r.t type conversions. * If a CASE has branches with string/int/boolean branch types; there is no common type. */ - private List adjustCaseBranchTypes(List nodes, RelDataType retType) { + public static List adjustCaseBranchTypes(List nodes, RelDataType retType, RexBuilder rexBuilder) { List newNodes = new ArrayList<>(); for (int i = 0; i < nodes.size(); i++) { RexNode node = nodes.get(i); if ((i % 2 == 1 || i == nodes.size() - 1) && !node.getType().getSqlTypeName().equals(retType.getSqlTypeName())) { - newNodes.add(cluster.getRexBuilder().makeCast(retType, node)); + newNodes.add(rexBuilder.makeCast(retType, node)); } else { newNodes.add(node); } @@ -602,33 +415,33 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newNodes; } - private List rewriteExtractDateChildren(SqlOperator op, List childRexNodeLst) - throws SemanticException { + public static List rewriteExtractDateChildren(SqlOperator op, List childRexNodeLst, + RexBuilder rexBuilder) { List newChildRexNodeLst = new ArrayList<>(2); final boolean isTimestampLevel; if (op == HiveExtractDate.YEAR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.YEAR)); isTimestampLevel = false; } else if (op == HiveExtractDate.QUARTER) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.QUARTER)); isTimestampLevel = false; } else if (op == HiveExtractDate.MONTH) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MONTH)); isTimestampLevel = false; } else if (op == HiveExtractDate.WEEK) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.WEEK)); isTimestampLevel = false; } else if (op == HiveExtractDate.DAY) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.DAY)); isTimestampLevel = false; } else if (op == HiveExtractDate.HOUR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.HOUR)); isTimestampLevel = true; } else if (op == HiveExtractDate.MINUTE) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MINUTE)); isTimestampLevel = true; } else if (op == HiveExtractDate.SECOND) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.SECOND)); isTimestampLevel = true; } else { isTimestampLevel = false; @@ -640,76 +453,75 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c } else { // We need to add a cast to DATETIME Family if (isTimestampLevel) { - newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child)); + newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child, rexBuilder)); } else { - newChildRexNodeLst.add(makeCast(SqlTypeName.DATE, child)); + newChildRexNodeLst.add(makeCast(SqlTypeName.DATE, child, rexBuilder)); } } return newChildRexNodeLst; } - private RexNode makeCast(SqlTypeName typeName, final RexNode child) { - RelDataType sqlType = cluster.getTypeFactory().createSqlType(typeName); - RelDataType nullableType = cluster.getTypeFactory().createTypeWithNullability(sqlType, true); - return cluster.getRexBuilder().makeCast(nullableType, child); + private static RexNode makeCast(SqlTypeName typeName, final RexNode child, RexBuilder rexBuilder) { + RelDataType sqlType = rexBuilder.getTypeFactory().createSqlType(typeName); + RelDataType nullableType = rexBuilder.getTypeFactory().createTypeWithNullability(sqlType, true); + return rexBuilder.makeCast(nullableType, child); } - private List rewriteFloorDateChildren(SqlOperator op, List childRexNodeLst) - throws SemanticException { - List newChildRexNodeLst = new ArrayList(); + public static List rewriteFloorDateChildren(SqlOperator op, List childRexNodeLst, + RexBuilder rexBuilder) { + List newChildRexNodeLst = new ArrayList<>(); assert childRexNodeLst.size() == 1; newChildRexNodeLst.add(childRexNodeLst.get(0)); if (op == HiveFloorDate.YEAR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.YEAR)); } else if (op == HiveFloorDate.QUARTER) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.QUARTER)); } else if (op == HiveFloorDate.MONTH) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MONTH)); } else if (op == HiveFloorDate.WEEK) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.WEEK)); } else if (op == HiveFloorDate.DAY) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.DAY)); } else if (op == HiveFloorDate.HOUR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.HOUR)); } else if (op == HiveFloorDate.MINUTE) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MINUTE)); } else if (op == HiveFloorDate.SECOND) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.SECOND)); } return newChildRexNodeLst; } - private List rewriteToDateChildren(List childRexNodeLst) { - List newChildRexNodeLst = new ArrayList(); + public static List rewriteToDateChildren(List childRexNodeLst, RexBuilder rexBuilder) { + List newChildRexNodeLst = new ArrayList<>(); assert childRexNodeLst.size() == 1; RexNode child = childRexNodeLst.get(0); if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval(child.getType())) { newChildRexNodeLst.add(child); } else { - newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child)); + newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child, rexBuilder)); } return newChildRexNodeLst; } - private List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst) - throws SemanticException { + public static List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst, + RexBuilder rexBuilder) throws SemanticException { assert op.getKind() == SqlKind.IN; RexNode firstPred = childRexNodeLst.get(0); List newChildRexNodeLst = new ArrayList(); for (int i = 1; i < childRexNodeLst.size(); i++) { newChildRexNodeLst.add( - cluster.getRexBuilder().makeCall( + rexBuilder.makeCall( SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); } return newChildRexNodeLst; } - private List rewriteCoalesceChildren( - ExprNodeGenericFuncDesc func, List childRexNodeLst) { + public static List rewriteCoalesceChildren( + List childRexNodeLst, RexBuilder rexBuilder) { final List convertedChildList = Lists.newArrayList(); assert childRexNodeLst.size() > 0; - final RexBuilder rexBuilder = cluster.getRexBuilder(); int i=0; for (; i < childRexNodeLst.size()-1; ++i) { // WHEN child not null THEN child @@ -741,77 +553,7 @@ private static boolean checkForStatefulFunctions(List list) { return false; } - private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { - InputCtx ctxLookingFor = null; - - if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) { - ctxLookingFor = inputCtxs.get(0); - } else { - String tableAlias = col.getTabAlias(); - String colAlias = col.getColumn(); - int noInp = 0; - for (InputCtx ic : inputCtxs) { - if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) { - if (ic.hiveRR.getPosition(colAlias) >= 0) { - ctxLookingFor = ic; - noInp++; - } - } - } - - if (noInp > 1) { - throw new RuntimeException("Ambiguous column mapping"); - } - } - - return ctxLookingFor; - } - - protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { - //if this is co-rrelated we need to make RexCorrelVariable(with id and type) - // id and type should be retrieved from outerRR - InputCtx ic = getInputCtx(col); - if(ic == null) { - // we have correlated column, build data type from outer rr - RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null); - if (this.outerNameToPosMap.get(col.getColumn()) == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN_NAME.getMsg(col.getColumn())); - } - - int pos = this.outerNameToPosMap.get(col.getColumn()); - CorrelationId colCorr = new CorrelationId(this.correlatedId); - RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr); - return cluster.getRexBuilder().makeFieldAccess(corExpr, pos); - } - int pos = ic.hiveNameToPosMap.get(col.getColumn()); - return cluster.getRexBuilder().makeInputRef( - ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema); - } - - private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE), - MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE); - - private static NlsString makeHiveUnicodeString(Interpretation interpretation, String text) { - return new HiveNlsString(interpretation, text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT); - } - - static class HiveNlsString extends NlsString { - - enum Interpretation { - CHAR, VARCHAR, STRING; - } - - public final Interpretation interpretation; - - public HiveNlsString(Interpretation interpretation, String value, String charsetName, SqlCollation collation) { - super(value, charsetName, collation); - this.interpretation = interpretation; - } - - } - protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException { - final RexBuilder rexBuilder = cluster.getRexBuilder(); final RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); final PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo(); final RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory); @@ -844,7 +586,6 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx case LONG: calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value)); break; - // TODO: is Decimal an exact numeric or approximate numeric? case DECIMAL: if (value instanceof HiveDecimal) { value = ((HiveDecimal) value).bigDecimalValue(); @@ -867,8 +608,6 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx // TODO: return createNullLiteral(literal); } BigDecimal bd = (BigDecimal) value; - BigInteger unscaled = bd.unscaledValue(); - int precision = bd.unscaledValue().abs().toString().length(); int scale = bd.scale(); @@ -877,11 +616,11 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx if (precision > scale) { // bd is greater than or equal to 1 relType = - cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, precision, scale); + typeFactory.createSqlType(SqlTypeName.DECIMAL, precision, scale); } else { // bd is less than 1 relType = - cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, scale + 1, scale); + typeFactory.createSqlType(SqlTypeName.DECIMAL, scale + 1, scale); } calciteLiteral = rexBuilder.makeExactLiteral(bd, relType); break; @@ -901,16 +640,19 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx if (value instanceof HiveChar) { value = ((HiveChar) value).getValue(); } - calciteLiteral = rexBuilder.makeCharLiteral(makeHiveUnicodeString(Interpretation.CHAR, (String) value)); + calciteLiteral = rexBuilder.makeCharLiteral( + RexNodeExprFactory.makeHiveUnicodeString(Interpretation.CHAR, (String) value)); break; case VARCHAR: if (value instanceof HiveVarchar) { value = ((HiveVarchar) value).getValue(); } - calciteLiteral = rexBuilder.makeCharLiteral(makeHiveUnicodeString(Interpretation.VARCHAR, (String) value)); + calciteLiteral = rexBuilder.makeCharLiteral( + RexNodeExprFactory.makeHiveUnicodeString(Interpretation.VARCHAR, (String) value)); break; case STRING: - calciteLiteral = rexBuilder.makeCharLiteral(makeHiveUnicodeString(Interpretation.STRING, (String) value)); + calciteLiteral = rexBuilder.makeCharLiteral( + RexNodeExprFactory.makeHiveUnicodeString(Interpretation.STRING, (String) value)); break; case DATE: final Date date = (Date) value; @@ -936,14 +678,10 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx break; case TIMESTAMPLOCALTZ: final TimestampString tsLocalTZString; - if (value == null) { - tsLocalTZString = null; - } else { - Instant i = ((TimestampTZ)value).getZonedDateTime().toInstant(); - tsLocalTZString = TimestampString - .fromMillisSinceEpoch(i.toEpochMilli()) - .withNanos(i.getNano()); - } + Instant i = ((TimestampTZ)value).getZonedDateTime().toInstant(); + tsLocalTZString = TimestampString + .fromMillisSinceEpoch(i.toEpochMilli()) + .withNanos(i.getNano()); calciteLiteral = rexBuilder.makeTimestampWithLocalTimeZoneLiteral( tsLocalTZString, rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)); @@ -973,25 +711,10 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx case BINARY: case UNKNOWN: default: - throw new RuntimeException("UnSupported Literal"); + throw new RuntimeException("Unsupported Literal"); } return calciteLiteral; } - public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode, - List inputRels, LinkedHashMap relToHiveRR, - Map> relToHiveColNameCalcitePosMap, boolean flattenExpr) - throws SemanticException { - List inputCtxLst = new ArrayList(); - - int offSet = 0; - for (RelNode r : inputRels) { - inputCtxLst.add(new InputCtx(r.getRowType(), relToHiveColNameCalcitePosMap.get(r), relToHiveRR - .get(r), offSet)); - offSet += r.getRowType().getFieldCount(); - } - - return (new RexNodeConverter(cluster, inputCtxLst, flattenExpr)).convert(joinCondnExprNode); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 07ca87fda0..9819f4a82d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSqlFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTruncSqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToUnixTimestampSqlOperator; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnixTimestampSqlOperator; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -243,6 +244,8 @@ public static ASTNode buildAST(SqlOperator op, List children) { case IN: case BETWEEN: case ROW: + case ARRAY_VALUE_CONSTRUCTOR: + case MAP_VALUE_CONSTRUCTOR: case IS_NOT_TRUE: case IS_TRUE: case IS_NOT_FALSE: @@ -381,6 +384,8 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); + registerFunction("array", SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, hToken(HiveParser.Identifier, "array")); + registerFunction("map", SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR, hToken(HiveParser.Identifier, "map")); registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.Identifier, "isnotnull")); registerFunction("isnull", SqlStdOperatorTable.IS_NULL, hToken(HiveParser.Identifier, "isnull")); registerFunction("isnottrue", SqlStdOperatorTable.IS_NOT_TRUE, hToken(HiveParser.Identifier, "isnottrue")); @@ -464,12 +469,12 @@ private static String getName(GenericUDF hiveUDF) { ); registerFunction("trunc", HiveTruncSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "trunc")); registerFunction("to_date", HiveToDateSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "to_date")); - registerFunction("to_unix_timestamp", HiveUnixTimestampSqlOperator.INSTANCE, - hToken(HiveParser.Identifier, "to_unix_timestamp") - ); + registerFunction("to_unix_timestamp", HiveToUnixTimestampSqlOperator.INSTANCE, + hToken(HiveParser.Identifier, "to_unix_timestamp")); + registerFunction("unix_timestamp", HiveUnixTimestampSqlOperator.INSTANCE, + hToken(HiveParser.Identifier, "unix_timestamp")); registerFunction("from_unixtime", HiveFromUnixTimeSqlOperator.INSTANCE, - hToken(HiveParser.Identifier, "from_unixtime") - ); + hToken(HiveParser.Identifier, "from_unixtime")); registerFunction("date_add", HiveDateAddSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_add")); registerFunction("date_sub", HiveDateSubSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_sub")); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index ed4a73e9f6..fc019a7338 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.parser.SqlParserPos; @@ -46,6 +47,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -140,7 +142,7 @@ public static RelDataType getType(RelOptCluster cluster, RowResolver rr, } public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) - throws CalciteSemanticException{ + throws CalciteSemanticException { RelDataType convertedType = null; switch (type.getCategory()) { @@ -272,6 +274,35 @@ public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dt throw new CalciteSemanticException("Union type is not supported", UnsupportedFeature.Union_type); } + /** + * This method exists because type information for CHAR literals + * is encoded within the literal value itself. The reason is that + * Calcite considers any character literal as CHAR type by default, + * while Hive is more flexible and may consider them STRING, VARCHAR, + * or CHAR. + */ + public static TypeInfo convertLiteralType(RexLiteral literal) { + if (literal.getType().getSqlTypeName() == SqlTypeName.CHAR) { + HiveNlsString string = (HiveNlsString) RexLiteral.value(literal); + if (string == null) { + // Original type + return TypeConverter.convertPrimitiveType(literal.getType()); + } + // Interpret + switch (string.interpretation) { + case STRING: + return TypeInfoFactory.stringTypeInfo; + case VARCHAR: + return TypeInfoFactory.getVarcharTypeInfo( + literal.getType().getPrecision()); + case CHAR: + return TypeInfoFactory.getCharTypeInfo( + literal.getType().getPrecision()); + } + } + return TypeConverter.convertPrimitiveType(literal.getType()); + } + public static TypeInfo convert(RelDataType rType) { if (rType.isStruct()) { return convertStructType(rType); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java index eb0d8aaca8..b81e51ff86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java @@ -42,7 +42,7 @@ @Override protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, - TableScanOperator top) throws SemanticException, UDFArgumentException { + TableScanOperator top) throws SemanticException { LBOpWalkerCtx owc = (LBOpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to ExprNodeDesc predicate = fop.getConf().getPredicate(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ee3aaa5799..d412d3114a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -19,6 +19,7 @@ import com.google.common.base.Function; import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableBiMap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; @@ -26,6 +27,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import java.util.Map.Entry; +import java.util.Optional; import java.util.regex.Pattern; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; @@ -78,6 +81,7 @@ import org.apache.calcite.rel.convert.ConverterImpl; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; @@ -100,9 +104,12 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexExecutor; +import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; @@ -118,7 +125,6 @@ import org.apache.calcite.sql.SqlWindow; import org.apache.calcite.sql.dialect.HiveSqlDialect; import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.Frameworks; @@ -147,11 +153,27 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.optimizer.calcite.*; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; @@ -164,6 +186,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -242,10 +265,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; -import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; +import org.apache.hadoop.hive.ql.parse.type.FunctionHelper; +import org.apache.hadoop.hive.ql.parse.type.FunctionHelper.AggregateInfo; +import org.apache.hadoop.hive.ql.parse.type.HiveFunctionHelper; import org.apache.hadoop.hive.ql.parse.type.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.PlanModifierForReturnPath; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter; @@ -259,26 +283,19 @@ import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; +import org.apache.hadoop.hive.ql.parse.type.RexNodeTypeCheck; import org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -1774,13 +1791,13 @@ private RowResolver genRowResolver(Operator op, QB qb) { private class CalcitePlannerAction implements Frameworks.PlannerAction { private RelOptCluster cluster; private RelOptSchema relOptSchema; + private FunctionHelper functionHelper; private final Map partitionCache; private final Map colStatsCache; private final ColumnAccessInfo columnAccessInfo; private Map viewProjectToTableSchema; - //correlated vars across subqueries within same query needs to have different ID - // this will be used in RexNodeConverter to create cor var + // correlated vars across subqueries within same query needs to have different ID private int subqueryId; // this is to keep track if a subquery is correlated and contains aggregate @@ -1819,6 +1836,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu this.cluster = optCluster; this.relOptSchema = relOptSchema; + this.functionHelper = new HiveFunctionHelper(rexBuilder); PerfLogger perfLogger = SessionState.getPerfLogger(); // 1. Gen Calcite Plan @@ -1836,7 +1854,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); // Create executor - RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster); + RexExecutor executorProvider = new HiveRexExecutorImpl(); calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); // We need to get the ColumnAccessInfo and viewToTableSchema for views. @@ -2734,18 +2752,13 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r } else if (unparseTranslator != null && unparseTranslator.isEnabled()) { genAllExprNodeDesc(joinCond, input, jCtx); } - Map exprNodes = ExprNodeTypeCheck.genExprNodeJoinCond( - joinCond, jCtx); + Map exprNodes = RexNodeTypeCheck.genExprNodeJoinCond( + joinCond, jCtx, cluster.getRexBuilder()); if (jCtx.getError() != null) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), jCtx.getError())); } - ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); - List inputRels = new ArrayList(); - inputRels.add(leftRel); - inputRels.add(rightRel); - calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, - relToHiveRR, relToHiveColNameCalcitePosMap, false); + calciteJoinCond = exprNodes.get(joinCond); } else { calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); } @@ -2802,8 +2815,13 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r RexUtil.composeConjunction(cluster.getRexBuilder(), ImmutableList.of(remainingEquiCond, nonEquiConds), false) : nonEquiConds; + final RelDataType combinedRowType = SqlValidatorUtil.createJoinType( + cluster.getTypeFactory(), inputRels[0].getRowType(), inputRels[1].getRowType(), + null, ImmutableList.of()); topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - inputRels[0], inputRels[1], calciteJoinCond); + inputRels[0], inputRels[1], + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), + calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType))); // Create join RR: we need to check whether we need to update left RR in case // previous call to projectNonColumnEquiConditions updated it @@ -2845,7 +2863,14 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r LOG.warn("Duplicates detected when adding columns to RR: see previous message"); } } else { - topRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType); + final RelDataType combinedRowType = SqlValidatorUtil.createJoinType( + cluster.getTypeFactory(), leftRel.getRowType(), rightRel.getRowType(), + null, ImmutableList.of()); + topRel = HiveJoin.getJoin( + cluster, leftRel, rightRel, + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), + calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType)), + calciteJoinType); topRR = RowResolver.getCombinedRR(leftRR, rightRR); if (namedColumns != null) { List tableAliases = new ArrayList<>(); @@ -3235,13 +3260,13 @@ private TableType obtainTableType(Table tabMetaData) { return TableType.NATIVE; } - private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, + private RelNode genFilterRelNode(ASTNode filterNode, RelNode srcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean useCaching) throws SemanticException { - ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), - outerRR, null, useCaching); - if (filterCondn instanceof ExprNodeConstantDesc - && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { + RexNode filterExpression = genRexNode(filterNode, relToHiveRR.get(srcRel), + outerRR, null, useCaching, cluster.getRexBuilder()); + if (RexUtil.isLiteral(filterExpression, false) + && filterExpression.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) { // queries like select * from t1 where 'foo'; // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we // can insert a cast to @@ -3252,16 +3277,16 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, throw new CalciteSemanticException("Filter expression with non-boolean return type.", UnsupportedFeature.Filter_expression_with_non_boolean_return_type); } - ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap - .get(srcRel); - RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, - HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES), - 0, true, subqueryId).convert(filterCondn); - RexNode factoredFilterExpr = RexUtil - .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); - RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - srcRel, factoredFilterExpr); + final ImmutableMap hiveColNameCalcitePosMap = + this.relToHiveColNameCalcitePosMap.get(srcRel); + filterExpression = new CorrelationConverter( + new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)), + outerNameToPosMap, outerRR, subqueryId).apply(filterExpression); + RexNode factoredFilterExpression = RexUtil + .pullFactors(cluster.getRexBuilder(), filterExpression); + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel, + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), + factoredFilterExpression, RelOptUtil.getFieldTypeList(srcRel.getRowType()))); this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); @@ -3269,6 +3294,60 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, return filterRel; } + /** + * Shuttle that replaces certain references with correlation variables + * if needed. + */ + private class CorrelationConverter extends RexShuttle { + private final InputContext inputContext; + private final ImmutableMap outerPositionToColumnName; + private final RowResolver outerRowResolver; + private final int correlatedId; + + private CorrelationConverter(InputContext inputContext, + ImmutableMap outerColumnNameToPosition, RowResolver outerRowResolver, + int correlatedId) { + this.inputContext = inputContext; + this.outerPositionToColumnName = outerColumnNameToPosition == null ? + null : ImmutableBiMap.copyOf(outerColumnNameToPosition).inverse(); + this.outerRowResolver = outerRowResolver; + this.correlatedId = correlatedId; + } + + @Override + public RexNode visitInputRef(RexInputRef col) { + InputContext context = null; + if (inputContext.inputRowResolver == null) { + context = inputContext; + } else { + int index = col.getIndex(); + String colName = inputContext.positionToColumnName.get(index); + if (colName != null) { + context = inputContext; + } + } + + if(context == null) { + // we have correlated column, build data type from outer rr + RelDataType rowType; + try { + rowType = TypeConverter.getType(cluster, outerRowResolver, null); + } catch (CalciteSemanticException e) { + throw new RuntimeException("Error converting type", e); + } + int index = col.getIndex() - inputContext.inputRowType.getFieldList().size(); + if (outerPositionToColumnName.get(index) == null) { + throw new RuntimeException(ErrorMsg.INVALID_COLUMN_NAME.getMsg()); + } + CorrelationId colCorr = new CorrelationId(correlatedId); + RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr); + return cluster.getRexBuilder().makeFieldAccess(corExpr, index); + } + int pos = col.getIndex(); + return cluster.getRexBuilder().makeInputRef( + context.inputRowType.getFieldList().get(pos).getType(), pos); + } + } private RelNode genLateralViewPlans(ASTNode lateralView, Map aliasToRel) throws SemanticException { @@ -3324,19 +3403,18 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al // Output types. They will be the concatenation of the input refs types and // the types of the expressions for the lateral view generated rows // Generate all expressions from lateral view - ExprNodeDesc valuesExpr = genExprNodeDesc(valuesClause, inputRR, false, false); - RexCall convertedOriginalValuesExpr = (RexCall) new RexNodeConverter(this.cluster, inputRel.getRowType(), - inputPosMap, 0, false).convert(valuesExpr); - RelDataType valuesRowType = ((ArraySqlType) convertedOriginalValuesExpr.getType()).getComponentType(); + RexCall valuesExpr = (RexCall) genRexNode( + valuesClause, inputRR, false, false, cluster.getRexBuilder()); + RelDataType valuesRowType = valuesExpr.getType().getComponentType(); List newStructExprs = new ArrayList<>(); - for (RexNode structExpr : convertedOriginalValuesExpr.getOperands()) { + for (RexNode structExpr : valuesExpr.getOperands()) { RexCall structCall = (RexCall) structExpr; List exprs = new ArrayList<>(inputRefs); exprs.addAll(structCall.getOperands()); newStructExprs.add(rexBuilder.makeCall(structCall.op, exprs)); } RexNode convertedFinalValuesExpr = - rexBuilder.makeCall(convertedOriginalValuesExpr.op, newStructExprs); + rexBuilder.makeCall(valuesExpr.op, newStructExprs); // The return type will be the concatenation of input type and original values type RelDataType retType = SqlValidatorUtil.deriveJoinRowType(inputRel.getRowType(), valuesRowType, JoinRelType.INNER, dtFactory, null, ImmutableList.of()); @@ -3394,7 +3472,7 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al columnAliases.add(SemanticAnalyzer.getColumnInternalName(i)); } } - ListTypeInfo listTypeInfo = (ListTypeInfo) valuesExpr.getTypeInfo(); // Array should have ListTypeInfo + ListTypeInfo listTypeInfo = (ListTypeInfo) TypeConverter.convert(valuesExpr.getType()); // Array should have ListTypeInfo StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types for (int i = 0, j = 0; i < columnAliases.size(); i++) { String internalColName; @@ -3470,26 +3548,21 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { - - Map subQueryToRelNode = new HashMap<>(); - boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, - subQueryToRelNode); + final Map subQueryToRelNode = new HashMap<>(); + boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, subQueryToRelNode); if(isSubQuery) { - ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel), - outerRR, subQueryToRelNode, forHavingClause); + RexNode filterExpression = genRexNode(searchCond, relToHiveRR.get(srcRel), + outerRR, subQueryToRelNode, forHavingClause, cluster.getRexBuilder()); ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap - .get(srcRel); - RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(), - outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), - outerRR, HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES), - 0, true, subqueryId).convert(subQueryExpr); + .get(srcRel); + filterExpression = new CorrelationConverter( + new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)), + outerNameToPosMap, outerRR, subqueryId).apply(filterExpression); RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - srcRel, convertedFilterLHS); - - this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap - .get(srcRel)); + srcRel, filterExpression); + relToHiveColNameCalcitePosMap.put(filterRel, relToHiveColNameCalcitePosMap.get(srcRel)); relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); return filterRel; } else { @@ -3511,42 +3584,17 @@ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap m_aggParams; - private final TypeInfo m_returnType; - private final String m_udfName; - private final boolean m_distinct; - - private AggInfo(List aggParams, TypeInfo returnType, String udfName, - boolean isDistinct) { - m_aggParams = aggParams; - m_returnType = returnType; - m_udfName = udfName; - m_distinct = isDistinct; - } - } - - private AggregateCall convertGBAgg(AggInfo agg, List gbChildProjLst, RexNodeConverter converter, + private AggregateCall convertGBAgg(AggregateInfo agg, List gbChildProjLst, HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException { - // 1. Get agg fn ret type in Calcite - RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, + RelDataType aggFnRetType = TypeConverter.convert(agg.getReturnType(), this.cluster.getTypeFactory()); // 2. Convert Agg Fn args and type of args to Calcite - // TODO: Does HQL allows expressions as aggregate args or can it only be - // projections from child? - Integer inputIndx; - List argList = new ArrayList(); - RexNode rexNd = null; - RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); - ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); - for (ExprNodeDesc expr : agg.m_aggParams) { - rexNd = converter.convert(expr); - inputIndx = rexNodeToPosMap.get(rexNd.toString()); + List argList = new ArrayList<>(); + ImmutableList.Builder aggArgRelDTBldr = ImmutableList.builder(); + for (RexNode rexNd : agg.getParameters()) { + Integer inputIndx = rexNodeToPosMap.get(rexNd.toString()); if (inputIndx == null) { gbChildProjLst.add(rexNd); rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); @@ -3555,35 +3603,28 @@ private AggregateCall convertGBAgg(AggInfo agg, List gbChildProjLst, Re } argList.add(inputIndx); - // TODO: does arg need type cast? - aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); + aggArgRelDTBldr.add(rexNd.getType()); } // 3. Get Aggregation FN from Calcite given name, ret type and input arg // type - final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, agg.m_distinct, + final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.getAggregateName(), agg.isDistinct(), aggArgRelDTBldr.build(), aggFnRetType); - return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); + return new AggregateCall(aggregation, agg.isDistinct(), argList, aggFnRetType, null); } - private RelNode genGBRelNode(List gbExprs, List aggInfoLst, + private RelNode genGBRelNode(List gbExprs, List aggInfoLst, List groupSets, RelNode srcRel) throws SemanticException { - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, - 0, false); - final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty(); final List gbChildProjLst = Lists.newArrayList(); - final HashMap rexNodeToPosMap = new HashMap(); + final HashMap rexNodeToPosMap = new HashMap<>(); final List groupSetPositions = Lists.newArrayList(); Integer gbIndx = 0; - RexNode rnd; - for (ExprNodeDesc key : gbExprs) { - rnd = converter.convert(key); - gbChildProjLst.add(rnd); + for (RexNode gbExpr : gbExprs) { + gbChildProjLst.add(gbExpr); groupSetPositions.add(gbIndx); - rexNodeToPosMap.put(rnd.toString(), gbIndx); + rexNodeToPosMap.put(gbExpr.toString(), gbIndx); gbIndx++; } final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); @@ -3593,19 +3634,19 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs List transformedGroupSets = null; if(hasGroupSets) { Set setTransformedGroupSets = - new HashSet(groupSets.size()); + new HashSet<>(groupSets.size()); for(long val: groupSets) { setTransformedGroupSets.add(convert(val, groupSet.cardinality())); } // Calcite expects the grouping sets sorted and without duplicates - transformedGroupSets = new ArrayList(setTransformedGroupSets); + transformedGroupSets = new ArrayList<>(setTransformedGroupSets); Collections.sort(transformedGroupSets, ImmutableBitSet.COMPARATOR); } List aggregateCalls = Lists.newArrayList(); - for (AggInfo agg : aggInfoLst) { - aggregateCalls.add(convertGBAgg(agg, gbChildProjLst, converter, rexNodeToPosMap, - gbChildProjLst.size())); + for (AggregateInfo agg : aggInfoLst) { + aggregateCalls.add( + convertGBAgg(agg, gbChildProjLst, rexNodeToPosMap, gbChildProjLst.size())); } if (hasGroupSets) { // Create GroupingID column @@ -3621,7 +3662,12 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs // first element from srcRel gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); } - RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); + + // Create input project fixing up nullability of inputs + RelNode gbInputRel = HiveProject.create( + srcRel, + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), gbChildProjLst, RelOptUtil.getFieldTypeList(srcRel.getRowType())), + null); HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, transformedGroupSets, aggregateCalls); @@ -3673,120 +3719,59 @@ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, } private void addToGBExpr(RowResolver groupByOutputRowResolver, - RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, - List gbExprNDescLst, List outputColumnNames) { - // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is - // UDF + RowResolver groupByInputRowResolver, ASTNode grpbyExpr, RexNode grpbyExprNDesc, + List gbExprNDescLst, List outputColumnNames) { int i = gbExprNDescLst.size(); String field = SemanticAnalyzer.getColumnInternalName(i); outputColumnNames.add(field); gbExprNDescLst.add(grpbyExprNDesc); - ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); + ColumnInfo oColInfo = new ColumnInfo(field, TypeConverter.convert(grpbyExprNDesc.getType()), null, false); groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, groupByOutputRowResolver); } - private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) + private AggregateInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) throws SemanticException { - AggInfo aInfo = null; - - // 1 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); + List aggParameters = new ArrayList<>(); for (int i = 1; i <= aggFnLstArgIndx; i++) { - ASTNode paraExpr = (ASTNode) aggAst.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); - aggParameters.add(paraExprNode); + RexNode parameterExpr = genRexNode( + (ASTNode) aggAst.getChild(i), inputRR, cluster.getRexBuilder()); + aggParameters.add(parameterExpr); } - - // 2. Is this distinct UDAF boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; - - // 3. Determine type of UDAF - TypeInfo udafRetType = null; - - // 3.1 Obtain UDAF name + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); - // 3.2 Rank functions type is 'int'/'double' - if (FunctionRegistry.isRankingFunction(aggName)) { - if (aggName.equalsIgnoreCase("percent_rank")) { - udafRetType = TypeInfoFactory.doubleTypeInfo; - } else { - udafRetType = TypeInfoFactory.intTypeInfo; - } - } else { - // 3.3 Try obtaining UDAF evaluators to determine the ret type - try { - boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; - - // 3.3.1 Get UDAF Evaluator - Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, - isDistinct); - - GenericUDAFEvaluator genericUDAFEvaluator = null; - if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) - || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { - List originalParameterTypeInfos = SemanticAnalyzer - .getWritableObjectInspector(aggParameters); - genericUDAFEvaluator = FunctionRegistry.getGenericWindowingEvaluator(aggName, - originalParameterTypeInfos, isDistinct, isAllColumns); - GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - udafRetType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); - } else { - genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(aggName, aggParameters, - aggAst, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - - // 3.3.2 Get UDAF Info using UDAF Evaluator - GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - if (FunctionRegistry.pivotResult(aggName)) { - udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); - } else { - udafRetType = udaf.returnType; - } - } - } catch (Exception e) { - LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName - + ", trying to translate to GenericUDF"); - } + AggregateInfo aInfo = functionHelper.getWindowAggregateFunctionInfo( + isDistinct, isAllColumns, aggName, aggParameters); - // 3.4 Try GenericUDF translation - if (udafRetType == null) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - tcCtx.setAllowDistinctFunctions(false); - ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); - udafRetType = exp.getTypeInfo(); - } + // If that did not work, try GenericUDF translation + if (aInfo == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + RexNode exp = genRexNode((ASTNode) aggAst.getChild(0), inputRR, tcCtx); + aInfo = new AggregateInfo( + aggParameters, TypeConverter.convert(exp.getType()), aggName, isDistinct); } - // 4. Construct AggInfo - aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); - return aInfo; } /** - * Generate GB plan. - * - * @param qb - * @param srcRel - * @return TODO: 1. Grouping Sets (roll up..) - * @throws SemanticException + * Generate a group by plan. */ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - RelNode gbRel = null; + RelNode groupByRel = null; QBParseInfo qbp = getQBParseInfo(qb); // 1. Gather GB Expressions (AST) (GB + Aggregations) // NOTE: Multi Insert is not supported - String detsClauseName = qbp.getClauseNames().iterator().next(); + String destClauseName = qbp.getClauseNames().iterator().next(); // Check and transform group by *. This will only happen for select distinct *. // Here the "genSelectPlan" is being leveraged. // The main benefits are (1) remove virtual columns that should @@ -3794,7 +3779,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException // so that view is supported. The drawback is that an additional SEL op is added. If it is // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match // SEL%SEL% rule. - ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); + ASTNode selExprList = qb.getParseInfo().getSelForClause(destClauseName); SubQueryUtils.checkForTopLevelSubqueries(selExprList); if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { @@ -3802,8 +3787,8 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { // As we said before, here we use genSelectLogicalPlan to rewrite AllColRef srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null, true).getKey(); - RowResolver rr = this.relToHiveRR.get(srcRel); - qbp.setSelExprForClause(detsClauseName, genSelectDIAST(rr)); + RowResolver rr = relToHiveRR.get(srcRel); + qbp.setSelExprForClause(destClauseName, genSelectDIAST(rr)); } } @@ -3813,18 +3798,17 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException return null; } - List grpByAstExprs = getGroupByForClause(qbp, detsClauseName); - Map aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); - boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; - boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true - : false; + List groupByNodes = getGroupByForClause(qbp, destClauseName); + Map aggregationTrees = qbp.getAggregationExprsForClause(destClauseName); + boolean hasGrpByAstExprs = groupByNodes != null && !groupByNodes.isEmpty(); + boolean hasAggregationTrees = aggregationTrees != null && !aggregationTrees.isEmpty(); final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); // 2. Sanity check if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) - && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) { + && qbp.getDistinctFuncExprsForClause(destClauseName).size() > 1) { throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg()); } if (cubeRollupGrpSetPresent) { @@ -3833,9 +3817,9 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { - checkExpressionsForGroupingSet(grpByAstExprs, qb.getParseInfo() - .getDistinctFuncExprsForClause(detsClauseName), aggregationTrees, - this.relToHiveRR.get(srcRel)); + checkExpressionsForGroupingSet(groupByNodes, + qb.getParseInfo().getDistinctFuncExprsForClause(destClauseName), + aggregationTrees, relToHiveRR.get(srcRel)); if (qbp.getDestGroupingSets().size() > conf .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY)) { @@ -3847,10 +3831,9 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } } - if (hasGrpByAstExprs || hasAggregationTrees) { - ArrayList gbExprNDescLst = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); + List groupByExpressions = new ArrayList<>(); + List outputColumnNames = new ArrayList<>(); // 3. Input, Output Row Resolvers RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); @@ -3859,29 +3842,30 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException if (hasGrpByAstExprs) { // 4. Construct GB Keys (ExprNode) - for (int i = 0; i < grpByAstExprs.size(); ++i) { - ASTNode grpbyExpr = grpByAstExprs.get(i); - Map astToExprNDescMap = genAllExprNodeDesc(grpbyExpr, groupByInputRowResolver); - ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); - if (grpbyExprNDesc == null) { - throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump(), + for (int i = 0; i < groupByNodes.size(); ++i) { + ASTNode groupByNode = groupByNodes.get(i); + Map astToRexNodeMap = genAllRexNode( + groupByNode, groupByInputRowResolver, cluster.getRexBuilder()); + RexNode groupByExpression = astToRexNodeMap.get(groupByNode); + if (groupByExpression == null) { + throw new CalciteSemanticException("Invalid Column Reference: " + groupByNode.dump(), UnsupportedFeature.Invalid_column_reference); } - addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, - grpbyExprNDesc, gbExprNDescLst, outputColumnNames); + addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, groupByNode, + groupByExpression, groupByExpressions, outputColumnNames); } } // 5. GroupingSets, Cube, Rollup - int groupingColsSize = gbExprNDescLst.size(); + int groupingColsSize = groupByExpressions.size(); List groupingSets = null; if (cubeRollupGrpSetPresent) { - groupingSets = getGroupByGroupingSetsForClause(qbp, detsClauseName).getRight(); + groupingSets = getGroupByGroupingSetsForClause(qbp, destClauseName).getRight(); } // 6. Construct aggregation function Info - ArrayList aggregations = new ArrayList(); + ArrayList aggregations = new ArrayList(); if (hasAggregationTrees) { assert (aggregationTrees != null); for (ASTNode value : aggregationTrees.values()) { @@ -3892,26 +3876,20 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; // 6.2 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); + List aggParameters = new ArrayList<>(); for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode) value.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); - aggParameters.add(paraExprNode); + RexNode parameterExpr = genRexNode( + (ASTNode) value.getChild(i), groupByInputRowResolver, cluster.getRexBuilder()); + aggParameters.add(parameterExpr); } - Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, - isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator( - aggName, aggParameters, value, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + AggregateInfo aInfo = functionHelper.getAggregateFunctionInfo( + isDistinct, isAllColumns, aggName, aggParameters); aggregations.add(aInfo); String field = getColumnInternalName(groupingColsSize + aggregations.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, - "", false)); + groupByOutputRowResolver.putExpression(value, + new ColumnInfo(field, aInfo.getReturnType(), "", false)); } } @@ -3928,12 +3906,12 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } // 8. We create the group_by operator - gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); - relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver)); - this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + groupByRel = genGBRelNode(groupByExpressions, aggregations, groupingSets, srcRel); + relToHiveColNameCalcitePosMap.put(groupByRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver)); + relToHiveRR.put(groupByRel, groupByOutputRowResolver); } - return gbRel; + return groupByRel; } /** @@ -3948,8 +3926,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * @throws SemanticException */ private RelNode genOBLogicalPlan(QB qb, Pair selPair, - boolean outermostOB) throws SemanticException { - + boolean outermostOB) throws SemanticException { QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); @@ -3982,8 +3959,7 @@ private RelNode genOBLogicalPlan(QB qb, Pair selPair, } private RelNode genSBLogicalPlan(QB qb, Pair selPair, - boolean outermostOB) throws SemanticException { - + boolean outermostOB) throws SemanticException { QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); ASTNode sbAST = qbp.getSortByForClause(dest); @@ -4023,7 +3999,7 @@ private RelNode genSBLogicalPlan(QB qb, Pair selPair, // - Add Child Project Rel if needed, // - Generate Output RR, input Sel Rel for top constraining Sel private OBLogicalPlanGenState beginGenOBLogicalPlan( - ASTNode obAST, Pair selPair, boolean outermostOB) throws SemanticException { + ASTNode obAST, Pair selPair, boolean outermostOB) throws SemanticException { // selPair.getKey() is the operator right before OB // selPair.getValue() is RR which only contains columns needed in result // set. Extra columns needed by order by will be absent from it. @@ -4041,15 +4017,12 @@ private OBLogicalPlanGenState beginGenOBLogicalPlan( RowResolver inputRR = relToHiveRR.get(srcRel); RowResolver outputRR = new RowResolver(); - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); int srcRelRecordSz = srcRel.getRowType().getFieldCount(); for (int i = 0; i < obASTExprLst.size(); i++) { // 2.1 Convert AST Expr to ExprNode - ASTNode obASTExpr = (ASTNode) obASTExprLst.get(i); - ASTNode nullObASTExpr = (ASTNode) obASTExpr.getChild(0); + ASTNode orderByNode = (ASTNode) obASTExprLst.get(i); + ASTNode nullObASTExpr = (ASTNode) orderByNode.getChild(0); ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); @@ -4066,25 +4039,24 @@ private OBLogicalPlanGenState beginGenOBLogicalPlan( "the position alias will be ignored."); } } else { - ExprNodeDesc obExprNDesc = getOrderByExprNodeDesc(selectOutputRR, inputRR, obASTExpr, ref); // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); + RexNode orderByExpression = getOrderByExpression(selectOutputRR, inputRR, orderByNode, ref); // 2.3 Determine the index of ob expr in child schema // NOTE: Calcite can not take compound exprs in OB without it being // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); + if (orderByExpression instanceof RexInputRef) { + fieldIndex = ((RexInputRef) orderByExpression).getIndex(); } else { fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair<>(ref, obExprNDesc.getTypeInfo())); + newVCLst.add(orderByExpression); + vcASTTypePairs.add(new Pair<>(ref, TypeConverter.convert(orderByExpression.getType()))); } } // 2.4 Determine the Direction of order by RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + if (orderByNode.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { order = RelFieldCollation.Direction.ASCENDING; } RelFieldCollation.NullDirection nullOrder; @@ -4146,16 +4118,16 @@ private OBLogicalPlanGenState beginGenOBLogicalPlan( return new OBLogicalPlanGenState(obInputRel, fieldCollations, selectOutputRR, outputRR, srcRel); } - private ExprNodeDesc getOrderByExprNodeDesc( - RowResolver selectOutputRR, RowResolver inputRR, ASTNode obASTExpr, ASTNode ref) - throws SemanticException { + private RexNode getOrderByExpression( + RowResolver selectOutputRR, RowResolver inputRR, ASTNode orderByNode, ASTNode ref) + throws SemanticException { // first try to get it from select // in case of udtf, selectOutputRR may be null. - ExprNodeDesc obExprNDesc = null; + RexNode orderByExpression = null; if (selectOutputRR != null) { try { - Map astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); - obExprNDesc = astToExprNDescMap.get(ref); + Map astToExprNDescMap = genAllRexNode(ref, selectOutputRR, cluster.getRexBuilder()); + orderByExpression = astToExprNDescMap.get(ref); } catch (SemanticException ex) { // we can tolerate this as this is the previous behavior LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " @@ -4163,14 +4135,14 @@ private ExprNodeDesc getOrderByExprNodeDesc( } } // then try to get it from all - if (obExprNDesc == null) { - Map astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); - obExprNDesc = astToExprNDescMap.get(ref); + if (orderByExpression == null) { + Map astToExprNDescMap = genAllRexNode(ref, inputRR, cluster.getRexBuilder()); + orderByExpression = astToExprNDescMap.get(ref); } - if (obExprNDesc == null) { - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + if (orderByExpression == null) { + throw new SemanticException("Invalid order by expression: " + orderByNode.toString()); } - return obExprNDesc; + return orderByExpression; } // SELECT a, b FROM t ORDER BY 1 @@ -4249,32 +4221,31 @@ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcept return sortRel; } - private List getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, + private List getPartitionKeys(PartitionSpec ps, RowResolver inputRR) throws SemanticException { - List pKeys = new ArrayList(); + List pKeys = new ArrayList<>(); if (ps != null) { List pExprs = ps.getExpressions(); for (PartitionExpression pExpr : pExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); - pKeys.add(converter.convert(exp)); + RexNode exp = genRexNode(pExpr.getExpression(), inputRR, tcCtx); + pKeys.add(exp); } } return pKeys; } - private List getOrderKeys(OrderSpec os, RexNodeConverter converter, + private List getOrderKeys(OrderSpec os, RowResolver inputRR) throws SemanticException { - List oKeys = new ArrayList(); + List oKeys = new ArrayList<>(); if (os != null) { List oExprs = os.getExpressions(); for (OrderExpression oExpr : oExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); - RexNode ordExp = converter.convert(exp); + RexNode ordExp = genRexNode(oExpr.getExpression(), inputRR, tcCtx); Set flags = new HashSet(); if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) { flags.add(SqlKind.DESCENDING); @@ -4360,50 +4331,44 @@ private int getWindowSpecIndx(ASTNode wndAST) { // TODO: do we need to get to child? int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); // 2. Get Hive Aggregate Info - AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, + AggregateInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, this.relToHiveRR.get(srcRel)); // 3. Get Calcite Return type for Agg Fn - wHiveRetType = hiveAggInfo.m_returnType; - RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, + wHiveRetType = hiveAggInfo.getReturnType(); + RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.getReturnType(), this.cluster.getTypeFactory()); // 4. Convert Agg Fn args to Calcite - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), - posMap, 0, false); - Builder calciteAggFnArgsBldr = ImmutableList. builder(); - Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); - for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { - calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); - calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) - .getTypeInfo(), this.cluster.getTypeFactory())); + List calciteAggFnArgs = hiveAggInfo.getParameters(); + Builder calciteAggFnArgsTypeBldr = ImmutableList.builder(); + for (int i = 0; i < hiveAggInfo.getParameters().size(); i++) { + calciteAggFnArgsTypeBldr.add(hiveAggInfo.getParameters().get(i).getType()); } - ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); // 5. Get Calcite Agg Fn final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn( - hiveAggInfo.m_udfName, hiveAggInfo.m_distinct, calciteAggFnArgsType, calciteAggFnRetType); + hiveAggInfo.getAggregateName(), hiveAggInfo.isDistinct(), calciteAggFnArgsType, calciteAggFnRetType); // 6. Translate Window spec RowResolver inputRR = relToHiveRR.get(srcRel); WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); - List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); - List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); + List partitionKeys = getPartitionKeys(wndSpec.getPartition(), inputRR); + List orderKeys = getOrderKeys(wndSpec.getOrder(), inputRR); RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart()); RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd()); boolean isRows = wndSpec.getWindowFrame().getWindowType() == WindowType.ROWS; w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, - upperBound, isRows, true, false, hiveAggInfo.m_distinct); + upperBound, isRows, true, false, hiveAggInfo.isDistinct()); } else { // TODO: Convert to Semantic Exception throw new RuntimeException("Unsupported window Spec"); } - return new Pair(w, wHiveRetType); + return new Pair<>(w, wHiveRetType); } private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet newColumns) @@ -4469,12 +4434,12 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, RelNode srcRel, List windowExpressions) throws CalciteSemanticException { // 1. Build Column Names - Set colNamesSet = new HashSet(); + Set colNamesSet = new HashSet<>(); List cInfoLst = out_rwsch.getRowSchema().getSignature(); - ArrayList columnNames = new ArrayList(); + List columnNames = new ArrayList<>(); Map windowToAlias = null; if (windowExpressions != null ) { - windowToAlias = new HashMap(); + windowToAlias = new HashMap<>(); for (WindowExpressionSpec wes : windowExpressions) { windowToAlias.put(wes.getExpression().toStringTree().toLowerCase(), wes.getAlias()); } @@ -4515,8 +4480,11 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw } // 3 Build Calcite Rel Node for project using converted projections & col - // names - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); + // names. Fix nullability + HiveRelNode selRel = HiveProject.create( + srcRel, + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), calciteColLst, RelOptUtil.getFieldTypeList(srcRel.getRowType())), + columnNames); // 4. Keep track of colname-to-posmap && RR for new select this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch)); @@ -4577,7 +4545,7 @@ private void setQueryHints(QB qb) throws SemanticException { RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns); srcRel = (selForWindow == null) ? srcRel : selForWindow; - ArrayList col_list = new ArrayList(); + List columnList = new ArrayList<>(); // 1. Get Select Expression List QBParseInfo qbp = getQBParseInfo(qb); @@ -4591,7 +4559,7 @@ private void setQueryHints(QB qb) throws SemanticException { || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); // 2.Row resolvers for input, output - RowResolver out_rwsch = new RowResolver(); + RowResolver outputRR = new RowResolver(); Integer pos = Integer.valueOf(0); // TODO: will this also fix windowing? try RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR; @@ -4602,7 +4570,6 @@ private void setQueryHints(QB qb) throws SemanticException { // 3. Query Hints // TODO: Handle Query Hints; currently we ignore them - boolean selectStar = false; int posn = 0; boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT); if (hintPresent) { @@ -4635,9 +4602,9 @@ private void setQueryHints(QB qb) throws SemanticException { if (!fi.isNative()) { unparseTranslator.addIdentifierTranslation((ASTNode) expr.getChild(0)); } - if (genericUDTF != null && (selectStar = exprType == HiveParser.TOK_FUNCTIONSTAR)) { - genColListRegex(".*", null, (ASTNode) expr.getChild(0), - col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); + if (genericUDTF != null && exprType == HiveParser.TOK_FUNCTIONSTAR) { + genRexNodeRegex(".*", null, (ASTNode) expr.getChild(0), + columnList, null, inputRR, starRR, pos, outputRR, qb.getAliases(), false); } } } @@ -4727,13 +4694,14 @@ private void setQueryHints(QB qb) throws SemanticException { boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false, subQueryToRelNode); if(isSubQuery) { - ExprNodeDesc subQueryExpr = genExprNodeDesc(expr, relToHiveRR.get(srcRel), - outerRR, subQueryToRelNode, true); - col_list.add(subQueryExpr); - + RexNode subQueryExpr = genRexNode(expr, relToHiveRR.get(srcRel), + outerRR, subQueryToRelNode, true, cluster.getRexBuilder()); + columnList.add(subQueryExpr); ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), - subQueryExpr.getWritableObjectInspector(), tabAlias, false); - if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeConverter.convert(subQueryExpr.getType())), + tabAlias, false); + if (!outputRR.putWithCheck(tabAlias, colAlias, null, colInfo)) { throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + colAlias + " => " + colInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR); @@ -4743,10 +4711,9 @@ private void setQueryHints(QB qb) throws SemanticException { // 6.4 Build ExprNode corresponding to colums if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : - getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, - excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); - selectStar = true; + pos = genRexNodeRegex(".*", + expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), + expr, columnList, excludedColumns, inputRR, starRR, pos, outputRR, qb.getAliases(), true); } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() @@ -4755,8 +4722,8 @@ private void setQueryHints(QB qb) throws SemanticException { // In case the expression is a regex COL. // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, col_list, excludedColumns, - inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); + pos = genRexNodeRegex(unescapeIdentifier(expr.getChild(0).getText()), null, + expr, columnList, excludedColumns, inputRR, starRR, pos, outputRR, qb.getAliases(), true); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) @@ -4768,11 +4735,11 @@ private void setQueryHints(QB qb) throws SemanticException { // In case the expression is TABLE.COL (col can be regex). // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex( + pos = genRexNodeRegex( unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), - expr, col_list, excludedColumns, inputRR, starRR, pos, - out_rwsch, qb.getAliases(), true); + expr, columnList, excludedColumns, inputRR, starRR, pos, + outputRR, qb.getAliases(), true); } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI) && !(srcRel instanceof HiveAggregate)) { // Likely a malformed query eg, select hash(distinct c1) from t1; @@ -4780,7 +4747,7 @@ private void setQueryHints(QB qb) throws SemanticException { UnsupportedFeature.Distinct_without_an_aggreggation); } else { // Case when this is an expression - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); // We allow stateful functions in the SELECT list (but nowhere else) tcCtx.setAllowStatefulFunctions(true); if (!qbp.getDestToGroupBy().isEmpty()) { @@ -4788,35 +4755,36 @@ private void setQueryHints(QB qb) throws SemanticException { expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr, !cubeRollupGrpSetPresent); } - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); - String recommended = recommendName(exp, colAlias); - if (recommended != null && out_rwsch.get(null, recommended) == null) { + RexNode expression = genRexNode(expr, inputRR, tcCtx); + + String recommended = recommendName(expression, colAlias, inputRR); + if (recommended != null && outputRR.get(null, recommended) == null) { colAlias = recommended; } - col_list.add(exp); + columnList.add(expression); + TypeInfo typeInfo = expression.isA(SqlKind.LITERAL) ? + TypeConverter.convertLiteralType((RexLiteral) expression) : + TypeConverter.convert(expression.getType()); ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); - colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) - .isSkewedCol() : false); - out_rwsch.put(tabAlias, colAlias, colInfo); + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo), + tabAlias, false); + outputRR.put(tabAlias, colAlias, colInfo); pos = Integer.valueOf(pos.intValue() + 1); } } } - selectStar = selectStar && exprList.getChildCount() == posn + 1; - // 7. Convert Hive projections to Calcite - List calciteColLst = new ArrayList(); - - RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), - outerNameToPosMap, buildHiveColNameToInputPosMap(col_list, inputRR), relToHiveRR.get(srcRel), - outerRR, HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES), - 0, false, subqueryId); - for (ExprNodeDesc colExpr : col_list) { - calciteColLst.add(rexNodeConv.convert(colExpr)); - } + // 7. For correlated queries + ImmutableMap hiveColNameCalcitePosMap = + buildHiveColNameToInputPosMap(columnList, inputRR); + CorrelationConverter cc = new CorrelationConverter( + new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)), + outerNameToPosMap, outerRR, subqueryId); + columnList = columnList.stream() + .map(cc::apply) + .collect(Collectors.toList()); // 8. Build Calcite Rel RelNode outputRel = null; @@ -4828,7 +4796,7 @@ private void setQueryHints(QB qb) throws SemanticException { // In OP return path, we need to generate a SEL and then a UDTF // following old semantic analyzer. outputRel = genUDTFPlan(genericUDTF, genericUDTFName, udtfTableAlias, udtfColAliases, qb, - calciteColLst, out_rwsch, srcRel); + columnList, outputRR, srcRel); } else { String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); @@ -4863,27 +4831,27 @@ public RexNode apply(RelDataTypeField input) { return new RexInputRef(input.getIndex(), input.getType()); } }); - originalRR = out_rwsch.duplicate(); + originalRR = outputRR.duplicate(); for (int i = 0; i < inputRR.getColumnInfos().size(); i++) { ColumnInfo colInfo = new ColumnInfo(inputRR.getColumnInfos().get(i)); - String internalName = SemanticAnalyzer.getColumnInternalName(out_rwsch.getColumnInfos() + String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos() .size() + i); colInfo.setInternalName(internalName); // if there is any confict, then we do not generate it in the new select // otherwise, we add it into the calciteColLst and generate the new select - if (!out_rwsch.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName, + if (!outputRR.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName, colInfo)) { LOG.trace("Column already present in RR. skipping."); } else { - calciteColLst.add(originalInputRefs.get(i)); + columnList.add(originalInputRefs.get(i)); } } - outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + outputRel = genSelectRelNode(columnList, outputRR, srcRel); // outputRel is the generated augmented select with extra unselected // columns, and originalRR is the original generated select return new Pair(outputRel, originalRR); } else { - outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + outputRel = genSelectRelNode(columnList, outputRR, srcRel); } } // 9. Handle select distinct as GBY if there exist windowing functions @@ -4892,8 +4860,8 @@ public RexNode apply(RelDataTypeField input) { outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), outputRel, groupSet, null, new ArrayList()); RowResolver groupByOutputRowResolver = new RowResolver(); - for (int i = 0; i < out_rwsch.getColumnInfos().size(); i++) { - ColumnInfo colInfo = out_rwsch.getColumnInfos().get(i); + for (int i = 0; i < outputRR.getColumnInfos().size(); i++) { + ColumnInfo colInfo = outputRR.getColumnInfos().get(i); ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo); @@ -4906,6 +4874,19 @@ public RexNode apply(RelDataTypeField input) { return new Pair(outputRel, null); } + Integer genRexNodeRegex(String colRegex, String tabAlias, ASTNode sel, + List exprList, Set excludeCols, RowResolver input, + RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, + boolean ensureUniqueCols) throws SemanticException { + List> colList = new ArrayList<>(); + Integer i = genColListRegex(colRegex, tabAlias, sel, + colList, excludeCols, input, colSrcRR, pos, output, aliases, ensureUniqueCols); + for (org.apache.commons.lang3.tuple.Pair p : colList) { + exprList.add(RexNodeTypeCheck.toExprNode(p.getLeft(), p.getRight(), 0, cluster.getRexBuilder())); + } + return i; + } + private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, String outputTableAlias, ArrayList colAliases, QB qb, List selectColLst, RowResolver selectRR, RelNode input) throws SemanticException { @@ -4929,29 +4910,18 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases); - // Use the RowResolver from the input operator to generate a input - // ObjectInspector that can be used to initialize the UDTF. Then, the - // resulting output object inspector can be used to make the RowResolver - // for the UDTF operator - List inputCols = selectRR.getColumnInfos(); - - // Create the object inspector for the input columns and initialize the + // Create the return type info for the input columns and initialize the // UDTF - List colNames = new ArrayList(); - ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()]; - for (int i = 0; i < inputCols.size(); i++) { - colNames.add(inputCols.get(i).getInternalName()); - colOIs[i] = inputCols.get(i).getObjectInspector(); - } - StandardStructObjectInspector rowOI = ObjectInspectorFactory - .getStandardStructObjectInspector(colNames, Arrays.asList(colOIs)); - StructObjectInspector outputOI = genericUDTF.initialize(rowOI); + StructTypeInfo type = (StructTypeInfo) TypeConverter.convert( + functionHelper.getReturnType( + functionHelper.getFunctionInfo(genericUDTFName), + selectColLst)); - int numUdtfCols = outputOI.getAllStructFieldRefs().size(); + int numUdtfCols = type.getAllStructFieldNames().size(); if (colAliases.isEmpty()) { // user did not specfied alias names, infer names from outputOI - for (StructField field : outputOI.getAllStructFieldRefs()) { - colAliases.add(field.getFieldName()); + for (String fieldName : type.getAllStructFieldNames()) { + colAliases.add(fieldName); } } // Make sure that the number of column aliases in the AS clause matches @@ -4966,7 +4936,9 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str List udtfCols = new ArrayList(); Iterator colAliasesIter = colAliases.iterator(); - for (StructField sf : outputOI.getAllStructFieldRefs()) { + for (int i = 0; i < type.getAllStructFieldTypeInfos().size(); i++) { + final String fieldName = type.getAllStructFieldNames().get(i); + final TypeInfo fieldTypeInfo = type.getAllStructFieldTypeInfos().get(i); String colAlias = colAliasesIter.next(); assert (colAlias != null); @@ -4974,23 +4946,21 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str // Since the UDTF operator feeds into a LVJ operator that will rename // all the internal names, we can just use field name from the UDTF's OI // as the internal name - ColumnInfo col = new ColumnInfo(sf.getFieldName(), - TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), - outputTableAlias, false); + ColumnInfo col = new ColumnInfo(fieldName, fieldTypeInfo, outputTableAlias, false); udtfCols.add(col); } // Create the row resolver for this operator from the output columns - RowResolver out_rwsch = new RowResolver(); + RowResolver outputRR = new RowResolver(); for (int i = 0; i < udtfCols.size(); i++) { - out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i)); + outputRR.put(outputTableAlias, colAliases.get(i), udtfCols.get(i)); } // Add the UDTFOperator to the operator DAG RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster); // Build row type from field - RelDataType retType = TypeConverter.getType(cluster, out_rwsch, null); + RelDataType retType = TypeConverter.getType(cluster, outputRR, null); Builder argTypeBldr = ImmutableList. builder(); @@ -5013,8 +4983,8 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str RelNode udtf = HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType, null); // Add new rel & its RR to the maps - relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(out_rwsch)); - relToHiveRR.put(udtf, out_rwsch); + relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(outputRR)); + relToHiveRR.put(udtf, outputRR); return udtf; } @@ -5291,18 +5261,15 @@ public Object post(Object t) { } private ImmutableMap buildHiveColNameToInputPosMap( - List col_list, RowResolver inputRR) { - // Build a map of Hive column Names (ExprNodeColumnDesc Name) - // to the positions of those projections in the input - Multimap hashCodeTocolumnDescMap = ArrayListMultimap.create(); - ExprNodeDescUtils.getExprNodeColumnDesc(col_list, hashCodeTocolumnDescMap); - ImmutableMap.Builder hiveColNameToInputPosMapBuilder = new ImmutableMap.Builder(); - String exprNodecolName; - for (ExprNodeColumnDesc exprDesc : hashCodeTocolumnDescMap.values()) { - exprNodecolName = exprDesc.getColumn(); - hiveColNameToInputPosMapBuilder.put(exprNodecolName, inputRR.getPosition(exprNodecolName)); + List columnList, RowResolver inputRR) { + final ImmutableBitSet refs = + RelOptUtil.InputFinder.bits(columnList, null); + ImmutableMap.Builder hiveColNameToInputPosMapBuilder = + new ImmutableMap.Builder<>(); + for (int ref : refs) { + hiveColNameToInputPosMapBuilder.put( + inputRR.getColumnInfos().get(ref).getInternalName(), ref); } - return hiveColNameToInputPosMapBuilder.build(); } @@ -5368,6 +5335,216 @@ protected Table getTableObjectByName(String tabName, boolean throwException) thr return tabNameToTabObject.get(fullyQualName); } + RexNode genRexNode(ASTNode expr, RowResolver input, + RowResolver outerRR, Map subqueryToRelNode, + boolean useCaching, RexBuilder rexBuilder) throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder, useCaching, false); + tcCtx.setOuterRR(outerRR); + tcCtx.setSubqueryToRelNode(subqueryToRelNode); + return genRexNode(expr, input, tcCtx); + } + + /** + * Generates a Calcite {@link RexNode} for the expression with TypeCheckCtx. + */ + RexNode genRexNode(ASTNode expr, RowResolver input, RexBuilder rexBuilder) + throws SemanticException { + // Since the user didn't supply a customized type-checking context, + // use default settings. + return genRexNode(expr, input, true, false, rexBuilder); + } + + RexNode genRexNode(ASTNode expr, RowResolver input, boolean useCaching, + boolean foldExpr, RexBuilder rexBuilder) throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder, useCaching, foldExpr); + return genRexNode(expr, input, tcCtx); + } + + /** + * Generates a Calcite {@link RexNode} for the expression and children of it + * with default TypeCheckCtx. + */ + Map genAllRexNode(ASTNode expr, RowResolver input, RexBuilder rexBuilder) + throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder); + return genAllRexNode(expr, input, tcCtx); + } + + /** + * Returns a Calcite {@link RexNode} for the expression. + * If it is evaluated already in previous operator, it can be retrieved from cache. + */ + RexNode genRexNode(ASTNode expr, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { + RexNode cached = null; + if (tcCtx.isUseCaching()) { + cached = getRexNodeCached(expr, input, tcCtx); + } + if (cached == null) { + Map allExprs = genAllRexNode(expr, input, tcCtx); + return allExprs.get(expr); + } + return cached; + } + + /** + * Find RexNode for the expression cached in the RowResolver. Returns null if not exists. + */ + private RexNode getRexNodeCached(ASTNode node, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { + ColumnInfo colInfo = input.getExpression(node); + if (colInfo != null) { + ASTNode source = input.getExpressionSource(node); + if (source != null) { + unparseTranslator.addCopyTranslation(node, source); + } + return RexNodeTypeCheck.toExprNode(colInfo, input, 0, tcCtx.getRexBuilder()); + } + return null; + } + + /** + * Generates all of the Calcite {@link RexNode}s for the expression and children of it + * passed in the arguments. This function uses the row resolver and the metadata information + * that are passed as arguments to resolve the column names to internal names. + * + * @param expr + * The expression + * @param input + * The row resolver + * @param tcCtx + * Customized type-checking context + * @return expression to exprNodeDesc mapping + * @throws SemanticException Failed to evaluate expression + */ + Map genAllRexNode(ASTNode expr, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { + // Create the walker and the rules dispatcher. + tcCtx.setUnparseTranslator(unparseTranslator); + + Map nodeOutputs = + RexNodeTypeCheck.genExprNode(expr, tcCtx); + RexNode desc = nodeOutputs.get(expr); + if (desc == null) { + String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr + .getChild(0).getText()); + ColumnInfo colInfo = input.get(null, tableOrCol); + String errMsg; + if (colInfo == null && input.getIsExprResolver()){ + errMsg = ASTErrorUtils.getMsg( + ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr); + } else { + errMsg = tcCtx.getError(); + } + throw new SemanticException(Optional.ofNullable(errMsg).orElse("Error in parsing ")); + } + if (desc instanceof HiveRexExprList) { + throw new SemanticException("TOK_ALLCOLREF is not supported in current context"); + } + + if (!unparseTranslator.isEnabled()) { + // Not creating a view, so no need to track view expansions. + return nodeOutputs; + } + + List fieldDescList = new ArrayList<>(); + + for (Map.Entry entry : nodeOutputs.entrySet()) { + if (!(entry.getValue() instanceof RexInputRef)) { + // we need to translate the RexFieldAccess too, e.g., identifiers in + // struct<>. + if (entry.getValue() instanceof RexFieldAccess) { + fieldDescList.add(entry.getKey()); + } + continue; + } + ASTNode node = entry.getKey(); + RexInputRef columnDesc = (RexInputRef) entry.getValue(); + int index = columnDesc.getIndex(); + String[] tmp; + if (index < input.getColumnInfos().size()) { + ColumnInfo columnInfo = input.getColumnInfos().get(index); + if (columnInfo.getTabAlias() == null + || columnInfo.getTabAlias().length() == 0) { + // These aren't real column refs; instead, they are special + // internal expressions used in the representation of aggregation. + continue; + } + tmp = input.reverseLookup(columnInfo.getInternalName()); + } else { + // in subquery case, tmp may be from outside. + ColumnInfo columnInfo = tcCtx.getOuterRR().getColumnInfos().get( + index - input.getColumnInfos().size()); + if (columnInfo.getTabAlias() == null + || columnInfo.getTabAlias().length() == 0) { + continue; + } + tmp = tcCtx.getOuterRR().reverseLookup(columnInfo.getInternalName()); + } + StringBuilder replacementText = new StringBuilder(); + replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf)); + replacementText.append("."); + replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf)); + unparseTranslator.addTranslation(node, replacementText.toString()); + } + + for (ASTNode node : fieldDescList) { + Map map = translateFieldDesc(node); + for (Entry entry : map.entrySet()) { + unparseTranslator.addTranslation(entry.getKey(), entry.getValue()); + } + } + + return nodeOutputs; + } + + private String recommendName(RexNode exp, String colAlias, RowResolver rowResolver) { + if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { + return null; + } + String column = recommendInputName(exp, rowResolver); + if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) { + return column; + } + return null; + } + + /** + * Recommend name for the expression + */ + private static String recommendInputName(RexNode desc, RowResolver rowResolver) { + Integer pos = null; + if (desc instanceof RexInputRef) { + pos = ((RexInputRef) desc).getIndex(); + } + if (desc.isA(SqlKind.CAST)) { + RexNode input = ((RexCall) desc).operands.get(0); + if (input instanceof RexInputRef) { + pos = ((RexInputRef) input).getIndex(); + } + } + return pos != null ? + rowResolver.getColumnInfos().get(pos).getInternalName() : + null; + } + + + /** + * Contains information useful to decorrelate queries. + */ + protected static class InputContext { + protected final RelDataType inputRowType; + protected final ImmutableBiMap positionToColumnName; + protected final RowResolver inputRowResolver; + + protected InputContext(RelDataType inputRowType, ImmutableMap columnNameToPosition, + RowResolver inputRowResolver) { + this.inputRowType = inputRowType; + this.positionToColumnName = ImmutableBiMap.copyOf(columnNameToPosition).inverse(); + this.inputRowResolver = inputRowResolver.duplicate(); + } + } + /** * This method can be called at startup time to pre-register all the * additional Hive classes (compared to Calcite core classes) that may diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java index ece6e774ad..36c3af5f8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DruidSqlOperatorConverter.java @@ -28,7 +28,6 @@ import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.ExtractOperatorConversion; import org.apache.calcite.adapter.druid.FloorOperatorConversion; -import org.apache.calcite.adapter.druid.UnarySuffixOperatorConversion; import org.apache.calcite.config.CalciteConnectionConfig; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; @@ -48,7 +47,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFromUnixTimeSqlOperator; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTruncSqlOperator; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnixTimestampSqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToUnixTimestampSqlOperator; import org.joda.time.Period; import javax.annotation.Nullable; @@ -97,7 +96,7 @@ private DruidSqlOperatorConverter() { druidOperatorMap.put(HiveTruncSqlOperator.INSTANCE, new DruidDateTruncOperatorConversion()); druidOperatorMap.put(HiveToDateSqlOperator.INSTANCE, new DruidToDateOperatorConversion()); druidOperatorMap.put(HiveFromUnixTimeSqlOperator.INSTANCE, new DruidFormUnixTimeOperatorConversion()); - druidOperatorMap.put(HiveUnixTimestampSqlOperator.INSTANCE, new DruidUnixTimestampOperatorConversion()); + druidOperatorMap.put(HiveToUnixTimestampSqlOperator.INSTANCE, new DruidUnixTimestampOperatorConversion()); druidOperatorMap.put(HiveDateAddSqlOperator.INSTANCE, new DruidDateArithmeticOperatorConversion(1, HiveDateAddSqlOperator.INSTANCE) ); @@ -251,7 +250,7 @@ private DruidSqlOperatorConverter() { implements org.apache.calcite.adapter.druid.DruidSqlOperatorConverter { @Override public SqlOperator calciteOperator() { - return HiveUnixTimestampSqlOperator.INSTANCE; + return HiveToUnixTimestampSqlOperator.INSTANCE; } @Nullable @Override public String toDruidExpression(RexNode rexNode, RelDataType rowType, DruidQuery query diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c87f2d2292..8b37161c52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -357,7 +357,7 @@ private final GlobalLimitCtx globalLimitCtx; // prefix for column names auto generated by hive - private final String autogenColAliasPrfxLbl; + protected final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName; // Keep track of view alias to read entity corresponding to the view @@ -3598,12 +3598,25 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, return output; } + Integer genExprNodeDescRegex(String colRegex, String tabAlias, ASTNode sel, + List exprList, Set excludeCols, RowResolver input, + RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, + boolean ensureUniqueCols) throws SemanticException { + List> colList = new ArrayList<>(); + Integer i = genColListRegex(colRegex, tabAlias, sel, + colList, excludeCols, input, colSrcRR, pos, output, aliases, ensureUniqueCols); + for (Pair p : colList) { + exprList.add(ExprNodeTypeCheck.toExprNode(p.getLeft(), p.getRight())); + } + return i; + } + @SuppressWarnings("nls") - // TODO: make aliases unique, otherwise needless rewriting takes place + // TODO: make aliases unique, otherwise needless rewriting takes place Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, - List colList, Set excludeCols, RowResolver input, - RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, - boolean ensureUniqueCols) throws SemanticException { + List> colList, Set excludeCols, RowResolver input, + RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, + boolean ensureUniqueCols) throws SemanticException { if (colSrcRR == null) { colSrcRR = input; @@ -3671,9 +3684,7 @@ Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, } ColumnInfo oColInfo = inputColsProcessed.get(colInfo); if (oColInfo == null) { - ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name, - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); - colList.add(expr); + colList.add(Pair.of(colInfo, colSrcRR)); oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); inputColsProcessed.put(colInfo, oColInfo); @@ -3760,9 +3771,7 @@ Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ColumnInfo oColInfo = inputColsProcessed.get(colInfo); if (oColInfo == null) { - ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name, - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); - colList.add(expr); + colList.add(Pair.of(colInfo, input)); oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); inputColsProcessed.put(colInfo, oColInfo); @@ -4471,7 +4480,7 @@ boolean isRegex(String pattern, HiveConf conf) { .getChild(0)); } if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) { - genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0), + genExprNodeDescRegex(".*", null, (ASTNode) udtfExpr.getChild(0), colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); } } @@ -4579,7 +4588,7 @@ boolean isRegex(String pattern, HiveConf conf) { // The real expression if (expr.getType() == HiveParser.TOK_ALLCOLREF) { int initPos = pos; - pos = genColListRegex(".*", expr.getChildCount() == 0 ? null + pos = genExprNodeDescRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); if (unparseTranslator.isEnabled()) { @@ -4592,7 +4601,7 @@ boolean isRegex(String pattern, HiveConf conf) { // In case the expression is a regex COL. // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), + pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL @@ -4603,7 +4612,7 @@ boolean isRegex(String pattern, HiveConf conf) { // In case the expression is TABLE.COL (col can be regex). // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), + pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); } else { @@ -4844,15 +4853,22 @@ boolean autogenColAliasPrfxIncludeFuncName() { * for each GroupBy aggregation. */ public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, - List aggParameters, ASTNode aggTree, - boolean isDistinct, boolean isAllColumns) + List aggParameters, ASTNode aggTree, + boolean isDistinct, boolean isAllColumns) + throws SemanticException { + return getGenericUDAFEvaluator2(aggName, getWritableObjectInspector(aggParameters), + aggTree, isDistinct, isAllColumns); + } + + public static GenericUDAFEvaluator getGenericUDAFEvaluator2(String aggName, + List aggParameterOIs, ASTNode aggTree, + boolean isDistinct, boolean isAllColumns) throws SemanticException { - List originalParameterTypeInfos = getWritableObjectInspector(aggParameters); GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( - aggName, originalParameterTypeInfos, isDistinct, isAllColumns); + aggName, aggParameterOIs, isDistinct, isAllColumns); if (null == result) { String reason = "Looking for UDAF Evaluator\"" + aggName - + "\" with parameters " + originalParameterTypeInfos; + + "\" with parameters " + aggParameterOIs; throw new SemanticException(ASTErrorUtils.getMsg( ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(), (ASTNode) aggTree.getChild(0), reason)); @@ -4872,7 +4888,16 @@ public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, * when the UDAF is not found or has problems. */ public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, - GenericUDAFEvaluator.Mode emode, List aggParameters) + GenericUDAFEvaluator.Mode emode, List aggParameters) + throws SemanticException { + GenericUDAFInfo udafInfo = getGenericUDAFInfo2( + evaluator, emode, getWritableObjectInspector(aggParameters)); + udafInfo.convertedParameters = aggParameters; + return udafInfo; + } + + public static GenericUDAFInfo getGenericUDAFInfo2(GenericUDAFEvaluator evaluator, + GenericUDAFEvaluator.Mode emode, List aggOIs) throws SemanticException { GenericUDAFInfo r = new GenericUDAFInfo(); @@ -4883,7 +4908,6 @@ public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, // set r.returnType ObjectInspector returnOI = null; try { - List aggOIs = getWritableObjectInspector(aggParameters); ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()]; for (int ii = 0; ii < aggOIs.size(); ++ii) { aggOIArray[ii] = aggOIs.get(ii); @@ -4893,9 +4917,6 @@ public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, } catch (HiveException e) { throw new SemanticException(e); } - // set r.convertedParameters - // TODO: type conversion - r.convertedParameters = aggParameters; return r; } @@ -7292,7 +7313,7 @@ private ExprNodeDesc getNotNullConstraintExpr(Table targetTable, Operator input, continue; } if (nullConstraintBitSet.indexOf(constraintIdx) != -1) { - ExprNodeDesc currExpr = ExprNodeTypeCheck.toExprNodeDesc(colInfos.get(colExprIdx)); + ExprNodeDesc currExpr = ExprNodeTypeCheck.toExprNode(colInfos.get(colExprIdx), null); ExprNodeDesc isNotNullUDF = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() .getFuncExprNodeDesc("isnotnull", currExpr); if (currUDF != null) { @@ -13035,16 +13056,6 @@ public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) return genExprNodeDesc(expr, input, true, false); } - ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, - RowResolver outerRR, Map subqueryToRelNode, - boolean useCaching) throws SemanticException { - - TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, false); - tcCtx.setOuterRR(outerRR); - tcCtx.setSubqueryToRelNode(subqueryToRelNode); - return genExprNodeDesc(expr, input, tcCtx); - } - ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching, boolean foldExpr) throws SemanticException { TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, foldExpr); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java index afc2bbbf66..90363984bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java @@ -20,10 +20,11 @@ import java.math.BigDecimal; import java.time.ZoneId; import java.util.List; +import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.SubqueryType; @@ -33,8 +34,7 @@ import org.apache.hive.common.util.DateUtils; /** - * Generic expressions factory. Currently, the only implementation produces - * Hive {@link ExprNodeDesc}. + * Generic expressions factory. */ public abstract class ExprFactory { @@ -50,7 +50,8 @@ * Generates an expression from the input column. This may not necessarily * be a column expression, e.g., if the column is a constant. */ - protected abstract T toExpr(ColumnInfo colInfo); + protected abstract T toExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws SemanticException; /* FIELD REFERENCES */ /** @@ -61,12 +62,19 @@ /** * Creates column expression. */ - protected abstract T createColumnRefExpr(ColumnInfo colInfo); + protected abstract T createColumnRefExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws SemanticException; + + /** + * Creates column expression. + */ + protected abstract T createColumnRefExpr(ColumnInfo colInfo, List rowResolverList) + throws SemanticException; /** * Returns column name referenced by a column expression. */ - protected abstract String getColumnName(T expr); + protected abstract String getColumnName(T expr, RowResolver rowResolver); /* CONSTANT EXPRESSIONS */ /** @@ -130,7 +138,7 @@ protected boolean isAllConstants(List exprs) { /** * Creates a double constant expression from input value. */ - protected abstract T createDoubleConstantExpr(String value); + protected abstract T createDoubleConstantExpr(String value) throws SemanticException; /** * Creates a decimal constant expression from input value. @@ -205,7 +213,7 @@ protected boolean isAllConstants(List exprs) { * Default generator for constant expression when type cannot be inferred * from input query. */ - protected T createConstantExpr(String value) { + protected T createConstantExpr(String value) throws SemanticException { // The expression can be any one of Double, Long and Integer. We // try to parse the expression in that order to ensure that the // most specific type is used for conversion. @@ -227,10 +235,17 @@ protected T createConstantExpr(String value) { return result != null ? result : result2; } + /** + * Creates a struct with given type. + */ + protected abstract T createStructExpr(TypeInfo typeInfo, List operands) + throws SemanticException; + /** * Creates a constant expression from input value with given type. */ - protected abstract T createConstantExpr(TypeInfo typeInfo, Object constantValue); + protected abstract T createConstantExpr(TypeInfo typeInfo, Object constantValue) + throws SemanticException; /** * Adjust type of constant value based on input type, e.g., adjust precision and scale @@ -249,12 +264,18 @@ protected abstract Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetT */ protected abstract Object getConstantValue(T expr); + /** + * Returns value stored in a constant expression as String. + */ + protected abstract String getConstantValueAsString(T expr); + /* METHODS FOR NESTED FIELD REFERENCES CREATION */ /** * Creates a reference to a nested field. */ protected abstract T createNestedColumnRefExpr( - TypeInfo typeInfo, T expr, String fieldName, Boolean isList); + TypeInfo typeInfo, T expr, String fieldName, Boolean isList) + throws SemanticException; /* FUNCTIONS */ /** @@ -266,13 +287,13 @@ protected abstract T createNestedColumnRefExpr( * Creates function call expression. */ protected abstract T createFuncCallExpr(TypeInfo typeInfo, GenericUDF genericUDF, - List inputs); + List inputs) throws SemanticException; /** * Creates function call expression. */ protected abstract T createFuncCallExpr(GenericUDF genericUDF, String funcText, - List inputs) throws UDFArgumentException; + List inputs) throws SemanticException; /** * Returns whether the input expression is an OR function call. @@ -290,15 +311,14 @@ protected abstract T createFuncCallExpr(GenericUDF genericUDF, String funcText, protected abstract boolean isPOSITIVEFuncCallExpr(T expr); /** - * Returns whether the input expression is a STRUCT function call. + * Returns whether the input expression is a NEGATIVE function call. */ - protected abstract boolean isSTRUCTFuncCallExpr(T expr); + protected abstract boolean isNEGATIVEFuncCallExpr(T expr); /** - * The method tries to rewrite an IN function call into an OR/AND function call. - * Returns null if the transformation fails. + * Returns whether the input expression is a STRUCT function call. */ - protected abstract List rewriteINIntoORFuncCallExpr(List inOperands) throws SemanticException; + protected abstract boolean isSTRUCTFuncCallExpr(T expr); /** * Returns true if a CASE expression can be converted into a COALESCE function call. @@ -310,7 +330,7 @@ protected abstract T createFuncCallExpr(GenericUDF genericUDF, String funcText, * Creates subquery expression. */ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, SubqueryType subqueryType, - Object[] inputs) throws CalciteSubquerySemanticException; + Object[] inputs) throws SemanticException; /* LIST OF EXPRESSIONS */ /** @@ -324,12 +344,10 @@ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, Su protected abstract T createExprsListExpr(); /** - * Adds expression to list of expressions and returns resulting - * list. - * If column list is mutable, it will not create a copy - * of the input list. + * Adds expression to list of expressions (list needs to be + * mutable). */ - protected abstract T addExprToExprsList(T columnList, T expr); + protected abstract void addExprToExprsList(T columnList, T expr); /* TYPE SYSTEM */ /** @@ -337,13 +355,18 @@ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, Su */ protected abstract TypeInfo getTypeInfo(T expr); + /** + * Returns the list of types in the input struct expression. + */ + protected abstract List getStructTypeInfoList(T expr); + /** * Changes the type of the input expression to the input type and * returns resulting expression. * If the input expression is mutable, it will not create a copy * of the expression. */ - protected abstract T setTypeInfo(T expr, TypeInfo type); + protected abstract T setTypeInfo(T expr, TypeInfo type) throws SemanticException; /* MISC */ /** @@ -358,4 +381,9 @@ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, Su */ protected abstract List getExprChildren(T expr); + /** + * Returns the list of names in the input struct expression. + */ + protected abstract List getStructNameList(T expr); + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java index 16d354667b..06d30f3768 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java @@ -23,6 +23,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.apache.calcite.rel.RelNode; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.hive.common.type.Date; @@ -36,12 +37,14 @@ import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; @@ -53,6 +56,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.plan.SubqueryType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -67,6 +71,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -94,7 +99,8 @@ protected boolean isExprInstance(Object o) { * {@inheritDoc} */ @Override - protected ExprNodeDesc toExpr(ColumnInfo colInfo) { + protected ExprNodeDesc toExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws SemanticException { ObjectInspector inspector = colInfo.getObjectInspector(); if (inspector instanceof ConstantObjectInspector && inspector instanceof PrimitiveObjectInspector) { return toPrimitiveConstDesc(colInfo, inspector); @@ -188,7 +194,15 @@ private static ExprNodeConstantDesc toStructConstDesc(ColumnInfo colInfo, Object * {@inheritDoc} */ @Override - protected ExprNodeColumnDesc createColumnRefExpr(ColumnInfo colInfo) { + protected ExprNodeColumnDesc createColumnRefExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) { + return new ExprNodeColumnDesc(colInfo); + } + + /** + * {@inheritDoc} + */ + @Override + protected ExprNodeColumnDesc createColumnRefExpr(ColumnInfo colInfo, List rowResolverList) { return new ExprNodeColumnDesc(colInfo); } @@ -345,7 +359,7 @@ protected Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetType, Obje String constTypeInfoName = sourceType.getTypeName(); if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { // because a comparison against a "string" will happen in "string" type. - // to avoid unintnetional comparisions in "string" + // to avoid unintentional comparisons in "string" // constants which are representing char/varchar values must be converted to the // appropriate type. if (targetType instanceof CharTypeInfo) { @@ -493,6 +507,25 @@ protected ExprNodeConstantDesc createIntervalSecondConstantExpr(String value) { bdNanos.multiply(NANOS_PER_SEC_BD).intValue())); } + /** + * {@inheritDoc} + */ + @Override + protected ExprNodeDesc createStructExpr(TypeInfo typeInfo, List operands) + throws SemanticException { + assert typeInfo instanceof StructTypeInfo; + if (isAllConstants(operands)) { + return createConstantExpr(typeInfo, + operands.stream() + .map(this::getConstantValue) + .collect(Collectors.toList())); + } + return ExprNodeGenericFuncDesc.newInstance( + new GenericUDFStruct(), + GenericUDFStruct.class.getAnnotation(Description.class).name(), + operands); + } + /** * {@inheritDoc} */ @@ -540,10 +573,9 @@ protected ExprNodeColumnListDesc createExprsListExpr() { * {@inheritDoc} */ @Override - protected ExprNodeColumnListDesc addExprToExprsList(ExprNodeDesc columnList, ExprNodeDesc expr) { + protected void addExprToExprsList(ExprNodeDesc columnList, ExprNodeDesc expr) { ExprNodeColumnListDesc l = (ExprNodeColumnListDesc) columnList; l.addColumn(expr); - return l; } /** @@ -570,6 +602,14 @@ protected Object getConstantValue(ExprNodeDesc expr) { return ((ExprNodeConstantDesc) expr).getValue(); } + /** + * {@inheritDoc} + */ + @Override + protected String getConstantValueAsString(ExprNodeDesc expr) { + return ((ExprNodeConstantDesc) expr).getValue().toString(); + } + /** * {@inheritDoc} */ @@ -582,7 +622,7 @@ protected boolean isColumnRefExpr(Object o) { * {@inheritDoc} */ @Override - protected String getColumnName(ExprNodeDesc expr) { + protected String getColumnName(ExprNodeDesc expr, RowResolver rowResolver) { return ((ExprNodeColumnDesc) expr).getColumn(); } @@ -614,8 +654,18 @@ protected TypeInfo getTypeInfo(ExprNodeDesc expr) { * {@inheritDoc} */ @Override - protected List rewriteINIntoORFuncCallExpr(List inOperands) throws SemanticException { - return TypeCheckProcFactoryUtils.rewriteInToOR(inOperands); + protected List getStructTypeInfoList(ExprNodeDesc expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) expr.getTypeInfo(); + return structTypeInfo.getAllStructFieldTypeInfos(); + } + + /** + * {@inheritDoc} + */ + @Override + protected List getStructNameList(ExprNodeDesc expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) expr.getTypeInfo(); + return structTypeInfo.getAllStructFieldNames(); } /** @@ -642,6 +692,14 @@ protected boolean isPOSITIVEFuncCallExpr(ExprNodeDesc expr) { return FunctionRegistry.isOpPositive(expr); } + /** + * {@inheritDoc} + */ + @Override + protected boolean isNEGATIVEFuncCallExpr(ExprNodeDesc expr) { + return FunctionRegistry.isOpNegative(expr); + } + /** * {@inheritDoc} */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java index 623b280437..3e3d331412 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java @@ -21,6 +21,7 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -68,8 +69,10 @@ private ExprNodeTypeCheck() { /** * Transforms column information into the corresponding Hive ExprNode. */ - public static ExprNodeDesc toExprNodeDesc(ColumnInfo columnInfo) { + public static ExprNodeDesc toExprNode(ColumnInfo columnInfo, RowResolver rowResolver) + throws SemanticException { ExprNodeDescExprFactory factory = new ExprNodeDescExprFactory(); - return factory.toExpr(columnInfo); + return factory.toExpr(columnInfo, rowResolver, 0); } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/FunctionHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/FunctionHelper.java new file mode 100644 index 0000000000..944be83432 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/FunctionHelper.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Interface to handle function information while generating + * Calcite {@link RexNode}. + */ +public interface FunctionHelper { + + /** + * Returns function information based on function text. + */ + FunctionInfo getFunctionInfo(String functionText) throws SemanticException; + + /** + * Given function information and its inputs, it returns + * the type of the output of the function. + */ + RelDataType getReturnType(FunctionInfo functionInfo, List inputs) + throws SemanticException; + + /** + * Given function information, the inputs to that function, and the + * expected return type, it will return the list of inputs with any + * necessary adjustments, e.g., casting of expressions. + */ + List convertInputs(FunctionInfo functionInfo, List inputs, + RelDataType returnType) + throws SemanticException; + + /** + * Given function information and text, inputs to a function, and the + * expected return type, it will return an expression node containing + * the function call. + */ + RexNode getExpression(String functionText, FunctionInfo functionInfo, + List inputs, RelDataType returnType) + throws SemanticException; + + /** + * Returns aggregation information based on given parameters. + */ + AggregateInfo getAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException; + + /** + * Returns aggregation information for analytical function based on given parameters. + */ + AggregateInfo getWindowAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException; + + /** + * Folds expression according to function semantics. + */ + default RexNode foldExpression(RexNode expr) { + return expr; + } + + /** + * Class to store aggregate function related information. + */ + class AggregateInfo { + private final List parameters; + private final TypeInfo returnType; + private final String aggregateName; + private final boolean distinct; + + public AggregateInfo(List parameters, TypeInfo returnType, String aggregateName, + boolean distinct) { + this.parameters = ImmutableList.copyOf(parameters); + this.returnType = returnType; + this.aggregateName = aggregateName; + this.distinct = distinct; + } + + public List getParameters() { + return parameters; + } + + public TypeInfo getReturnType() { + return returnType; + } + + public String getAggregateName() { + return aggregateName; + } + + public boolean isDistinct() { + return distinct; + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java new file mode 100644 index 0000000000..492cdb6717 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java @@ -0,0 +1,550 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlBinaryOperator; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToUnixTimestampSqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnixTimestampSqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Function helper for Hive. + */ +public class HiveFunctionHelper implements FunctionHelper { + + private static final Logger LOG = LoggerFactory.getLogger(HiveFunctionHelper.class); + + private final RexBuilder rexBuilder; + private final int maxNodesForInToOrTransformation; + + public HiveFunctionHelper(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + try { + this.maxNodesForInToOrTransformation = HiveConf.getIntVar( + Hive.get().getConf(), HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES); + } catch (HiveException e) { + throw new IllegalStateException(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public FunctionInfo getFunctionInfo(String functionText) + throws SemanticException { + return FunctionRegistry.getFunctionInfo(functionText); + } + + /** + * {@inheritDoc} + */ + @Override + public RelDataType getReturnType(FunctionInfo fi, List inputs) + throws SemanticException { + // 1) Gather inputs + ObjectInspector[] inputsOIs = new ObjectInspector[inputs.size()]; + for (int i = 0; i < inputsOIs.length; i++) { + inputsOIs[i] = createObjectInspector(inputs.get(i)); + } + // 2) Initialize and obtain return type + ObjectInspector oi = fi.getGenericUDF() != null ? + fi.getGenericUDF().initializeAndFoldConstants(inputsOIs) : + fi.getGenericUDTF().initialize(inputsOIs); + // 3) Convert to RelDataType + return TypeConverter.convert( + TypeInfoUtils.getTypeInfoFromObjectInspector(oi), rexBuilder.getTypeFactory()); + } + + /** + * {@inheritDoc} + */ + @Override + public List convertInputs(FunctionInfo fi, List inputs, + RelDataType returnType) + throws SemanticException { + // 1) Obtain UDF + final GenericUDF genericUDF = fi.getGenericUDF(); + final TypeInfo typeInfo = TypeConverter.convert(returnType); + TypeInfo targetType = null; + + boolean isNumeric = genericUDF instanceof GenericUDFBaseBinary + && typeInfo.getCategory() == Category.PRIMITIVE + && PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); + boolean isCompare = !isNumeric && genericUDF instanceof GenericUDFBaseCompare; + boolean isBetween = !isNumeric && genericUDF instanceof GenericUDFBetween; + boolean isIN = !isNumeric && genericUDF instanceof GenericUDFIn; + + if (isNumeric) { + targetType = typeInfo; + } else if (genericUDF instanceof GenericUDFBaseCompare) { + targetType = FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(0).getType()), + TypeConverter.convert(inputs.get(1).getType())); + } else if (genericUDF instanceof GenericUDFBetween) { + assert inputs.size() == 4; + // We skip first child as is not involved (is the revert boolean) + // The target type needs to account for all 3 operands + targetType = FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(1).getType()), + FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(2).getType()), + TypeConverter.convert(inputs.get(3).getType()))); + } else if (genericUDF instanceof GenericUDFIn) { + // We're only considering the first element of the IN list for the type + assert inputs.size() > 1; + targetType = FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(0).getType()), + TypeConverter.convert(inputs.get(1).getType())); + } + + if (targetType != null) { + List newInputs = new ArrayList<>(); + // Convert inputs if needed + for (int i = 0; i < inputs.size(); ++i) { + RexNode input = inputs.get(i); + TypeInfo inputTypeInfo = TypeConverter.convert(input.getType()); + RexNode tmpExprNode = input; + if (TypeInfoUtils.isConversionRequiredForComparison(targetType, inputTypeInfo)) { + if (isIN || isCompare) { + // For IN and compare, we will convert requisite children + tmpExprNode = convert(targetType, input); + } else if (isBetween) { + // For BETWEEN skip the first child (the revert boolean) + if (i > 0) { + tmpExprNode = convert(targetType, input); + } + } else if (isNumeric) { + // For numeric, we'll do minimum necessary cast - if we cast to the type + // of expression, bad things will happen. + PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(inputTypeInfo, targetType); + tmpExprNode = convert(minArgType, input); + } else { + throw new AssertionError("Unexpected " + targetType + " - not a numeric op or compare"); + } + } + + newInputs.add(tmpExprNode); + } + return newInputs; + } + return inputs; + } + + private RexNode convert(TypeInfo targetType, RexNode input) throws SemanticException { + if (targetType.getCategory() == Category.PRIMITIVE) { + return RexNodeTypeCheck.getExprNodeDefaultExprProcessor(rexBuilder) + .createConversionCast(input, (PrimitiveTypeInfo) targetType); + } else { + StructTypeInfo structTypeInfo = (StructTypeInfo) targetType; // struct + RexCall call = (RexCall) input; + List exprNodes = new ArrayList<>(); + for (int j = 0; j < structTypeInfo.getAllStructFieldTypeInfos().size(); j++) { + exprNodes.add( + convert( + structTypeInfo.getAllStructFieldTypeInfos().get(j), call.getOperands().get(j))); + } + return rexBuilder.makeCall(SqlStdOperatorTable.ROW, exprNodes); + } + } + + /** + * {@inheritDoc} + */ + @Override + public RexNode getExpression(String functionText, FunctionInfo fi, + List inputs, RelDataType returnType) + throws SemanticException { + // See if this is an explicit cast. + RexNode expr = RexNodeConverter.handleExplicitCast( + fi.getGenericUDF(), returnType, inputs, rexBuilder); + + if (expr == null) { + // This is not a cast; process the function. + ImmutableList.Builder argsTypes = ImmutableList.builder(); + for (RexNode input : inputs) { + argsTypes.add(input.getType()); + } + SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(functionText, + fi.getGenericUDF(), argsTypes.build(), returnType); + if (calciteOp.getKind() == SqlKind.CASE) { + // If it is a case operator, we need to rewrite it + inputs = RexNodeConverter.rewriteCaseChildren(functionText, inputs, rexBuilder); + // Adjust branch types by inserting explicit casts if the actual is ambigous + inputs = RexNodeConverter.adjustCaseBranchTypes(inputs, returnType, rexBuilder); + checkForStatefulFunctions(inputs); + } else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) { + // If it is a extract operator, we need to rewrite it + inputs = RexNodeConverter.rewriteExtractDateChildren(calciteOp, inputs, rexBuilder); + } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { + // If it is a floor operator, we need to rewrite it + inputs = RexNodeConverter.rewriteFloorDateChildren(calciteOp, inputs, rexBuilder); + } else if (calciteOp.getKind() == SqlKind.IN) { + // if it is a single item in an IN clause, transform A IN (B) to A = B + // from IN [A,B] => EQUALS [A,B] + // if it is more than an single item in an IN clause, + // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] + // Rewrite to OR is done only if number of operands are less than + // the threshold configured + boolean rewriteToOr = true; + if(maxNodesForInToOrTransformation != 0) { + if(inputs.size() > maxNodesForInToOrTransformation) { + rewriteToOr = false; + } + } + if(rewriteToOr) { + // If there are non-deterministic functions, we cannot perform this rewriting + List newInputs = HiveCalciteUtil.transformIntoOrAndClause(inputs, rexBuilder); + if (newInputs != null) { + inputs = newInputs; + if (inputs.size() == 1) { + inputs.add(rexBuilder.makeLiteral(false)); + } + calciteOp = SqlStdOperatorTable.OR; + } + } + } else if (calciteOp.getKind() == SqlKind.COALESCE && + inputs.size() > 1) { + // Rewrite COALESCE as a CASE + // This allows to be further reduced to OR, if possible + calciteOp = SqlStdOperatorTable.CASE; + inputs = RexNodeConverter.rewriteCoalesceChildren(inputs, rexBuilder); + // Adjust branch types by inserting explicit casts if the actual is ambigous + inputs = RexNodeConverter.adjustCaseBranchTypes(inputs, returnType, rexBuilder); + checkForStatefulFunctions(inputs); + } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { + inputs = RexNodeConverter.rewriteToDateChildren(inputs, rexBuilder); + } else if (calciteOp.getKind() == SqlKind.BETWEEN) { + assert inputs.get(0).isAlwaysTrue() || inputs.get(0).isAlwaysFalse(); + boolean invert = inputs.get(0).isAlwaysTrue(); + SqlBinaryOperator cmpOp; + if (invert) { + calciteOp = SqlStdOperatorTable.OR; + cmpOp = SqlStdOperatorTable.GREATER_THAN; + } else { + calciteOp = SqlStdOperatorTable.AND; + cmpOp = SqlStdOperatorTable.LESS_THAN_OR_EQUAL; + } + RexNode op = inputs.get(1); + RexNode rangeL = inputs.get(2); + RexNode rangeH = inputs.get(3); + inputs = new ArrayList<>(); + inputs.add(rexBuilder.makeCall(cmpOp, rangeL, op)); + inputs.add(rexBuilder.makeCall(cmpOp, op, rangeH)); + } else if (calciteOp == HiveUnixTimestampSqlOperator.INSTANCE && + inputs.size() > 0) { + // unix_timestamp(args) -> to_unix_timestamp(args) + calciteOp = HiveToUnixTimestampSqlOperator.INSTANCE; + } + expr = rexBuilder.makeCall(returnType, calciteOp, inputs); + } + + if (expr instanceof RexCall && !expr.isA(SqlKind.CAST)) { + RexCall call = (RexCall) expr; + expr = rexBuilder.makeCall(returnType, call.getOperator(), + RexUtil.flatten(call.getOperands(), call.getOperator())); + } + + return expr; + } + + private void checkForStatefulFunctions(List exprs) + throws SemanticException { + RexVisitor visitor = new RexVisitorImpl(true) { + @Override + public Void visitCall(final RexCall call) { + // TODO: We should be able to annotate functions in Calcite as stateful + // so we do not have to map back and forth to Hive functions when we are + // doing this check. + GenericUDF nodeUDF; + try { + nodeUDF = getFunctionInfo(call.getOperator().getName()).getGenericUDF(); + } catch (SemanticException e) { + throw new AssertionError("Cannot retrieve function " + call.getOperator().getName() + + " within StatefulFunctionsChecker"); + } + // Stateful? + if (FunctionRegistry.isStateful(nodeUDF)) { + throw new Util.FoundOne(call); + } + return super.visitCall(call); + } + }; + + try { + for (RexNode expr : exprs) { + expr.accept(visitor); + } + } catch (Util.FoundOne e) { + throw new SemanticException("Stateful expressions cannot be used inside of CASE"); + } + } + + + /** + * {@inheritDoc} + */ + @Override + public AggregateInfo getAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException { + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode( + GroupByDesc.Mode.COMPLETE, isDistinct); + List aggParameterOIs = new ArrayList<>(); + for (RexNode aggParameter : aggregateParameters) { + aggParameterOIs.add(createObjectInspector(aggParameter)); + } + GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator2( + aggregateName, aggParameterOIs, null, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo2( + genericUDAFEvaluator, udafMode, aggParameterOIs); + return new AggregateInfo(aggregateParameters, udaf.returnType, aggregateName, isDistinct); + } + + /** + * {@inheritDoc} + */ + @Override + public AggregateInfo getWindowAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException { + TypeInfo returnType = null; + + if (FunctionRegistry.isRankingFunction(aggregateName)) { + // Rank functions type is 'int'/'double' + if (aggregateName.equalsIgnoreCase("percent_rank")) { + returnType = TypeInfoFactory.doubleTypeInfo; + } else { + returnType = TypeInfoFactory.intTypeInfo; + } + } else { + // Try obtaining UDAF evaluators to determine the ret type + try { + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode( + GroupByDesc.Mode.COMPLETE, isDistinct); + List aggParameterOIs = new ArrayList<>(); + for (RexNode aggParameter : aggregateParameters) { + aggParameterOIs.add(createObjectInspector(aggParameter)); + } + if (aggregateName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) + || aggregateName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { + GenericUDAFEvaluator genericUDAFEvaluator = FunctionRegistry.getGenericWindowingEvaluator(aggregateName, + aggParameterOIs, isDistinct, isAllColumns); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo2( + genericUDAFEvaluator, udafMode, aggParameterOIs); + returnType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); + } else { + GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator2( + aggregateName, aggParameterOIs, null, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo2( + genericUDAFEvaluator, udafMode, aggParameterOIs); + if (FunctionRegistry.pivotResult(aggregateName)) { + returnType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); + } else { + returnType = udaf.returnType; + } + } + } catch (Exception e) { + LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggregateName + + ", trying to translate to GenericUDF"); + } + } + + return returnType != null ? + new AggregateInfo(aggregateParameters, returnType, aggregateName, isDistinct) : null; + } + + private ObjectInspector createObjectInspector(RexNode expr) { + ObjectInspector oi = createConstantObjectInspector(expr); + if (oi != null) { + return oi; + } + return TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeConverter.convert(expr.getType())); + } + + /** + * Returns constant object inspector or null if it could not be generated. + */ + private ConstantObjectInspector createConstantObjectInspector(RexNode expr) { + if (RexUtil.isLiteral(expr, true)) { // Literal or cast on literal + final ExprNodeConstantDesc constant; + if (expr.isA(SqlKind.LITERAL)) { + constant = ExprNodeConverter.toExprNodeConstantDesc((RexLiteral) expr); + } else { + RexNode foldedExpr = foldExpression(expr); + if (!foldedExpr.isA(SqlKind.LITERAL)) { + // Constant could not be generated + return null; + } + constant = ExprNodeConverter.toExprNodeConstantDesc((RexLiteral) foldedExpr); + } + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) constant.getTypeInfo(); + Object value = constant.getValue(); + Object writableValue = value == null ? null : + PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(typeInfo) + .getPrimitiveWritableObject(value); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + typeInfo, writableValue); + } else if (expr instanceof RexCall) { + RexCall call = (RexCall) expr; + if (call.getOperator() == SqlStdOperatorTable.ROW) { // Struct + List fieldNames = new ArrayList<>(); + List fieldObjectInspectors = new ArrayList<>(); + List writableValues = new ArrayList<>(); + for (int i = 0; i < call.getOperands().size(); i++) { + RexNode input = call.getOperands().get(i); + ConstantObjectInspector objectInspector = createConstantObjectInspector(input); + if (objectInspector == null) { + // Constant could not be generated + return null; + } + fieldNames.add(expr.getType().getFieldList().get(i).getName()); + fieldObjectInspectors.add(objectInspector); + writableValues.add(objectInspector.getWritableConstantValue()); + } + return ObjectInspectorFactory.getStandardConstantStructObjectInspector( + fieldNames, + fieldObjectInspectors, + writableValues); + } else if (call.getOperator() == SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR) { // List + ListTypeInfo listTypeInfo = (ListTypeInfo) TypeConverter.convert(expr.getType()); + TypeInfo typeInfo = listTypeInfo.getListElementTypeInfo(); + List writableValues = new ArrayList<>(); + for (RexNode input : call.getOperands()) { + ConstantObjectInspector objectInspector = createConstantObjectInspector(input); + if (objectInspector == null) { + // Constant could not be generated + return null; + } + writableValues.add(objectInspector.getWritableConstantValue()); + } + return ObjectInspectorFactory.getStandardConstantListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector( + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo), + ObjectInspectorCopyOption.WRITABLE), + writableValues); + } else if (call.getOperator() == SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR) { // Map + MapTypeInfo mapTypeInfo = (MapTypeInfo) TypeConverter.convert(expr.getType()); + Map writableValues = new HashMap<>(); + Iterator it = call.getOperands().iterator(); + while (it.hasNext()) { + ConstantObjectInspector keyObjectInspector = createConstantObjectInspector(it.next()); + if (keyObjectInspector == null) { + // Constant could not be generated + return null; + } + ConstantObjectInspector valueObjectInspector = createConstantObjectInspector(it.next()); + if (valueObjectInspector == null) { + // Constant could not be generated + return null; + } + writableValues.put( + keyObjectInspector.getWritableConstantValue(), + valueObjectInspector.getWritableConstantValue()); + } + return ObjectInspectorFactory.getStandardConstantMapObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector( + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo()), + ObjectInspectorCopyOption.WRITABLE), + ObjectInspectorUtils.getStandardObjectInspector( + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo()), + ObjectInspectorCopyOption.WRITABLE), + writableValues); + } + } + // Constant could not be generated + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public RexNode foldExpression(RexNode expr) { + HiveRexExecutorImpl executor = new HiveRexExecutorImpl(); + List result = new ArrayList<>(); + executor.reduce(rexBuilder, ImmutableList.of(expr), result); + return result.get(0); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java index 319bae6dbe..309a34ef88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java @@ -86,7 +86,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (!qualifiedAccess) { colInfo = getColInfo(ctx, null, tableOrCol, expr); // It's a column. - return exprFactory.createColumnRefExpr(colInfo); + return exprFactory.createColumnRefExpr(colInfo, ctx.getInputRRList()); } else if (hasTableAlias(ctx, tableOrCol, expr)) { return null; } else { @@ -160,12 +160,13 @@ protected ColumnExprProcessor getColumnExprProcessor() { @Override protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { + JoinTypeCheckCtx jctx = (JoinTypeCheckCtx) ctx; String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) .getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. ColumnInfo colInfo = getColInfo((JoinTypeCheckCtx) ctx, tableAlias, - exprFactory.getConstantValue((T) nodeOutputs[1]).toString(), expr); + exprFactory.getConstantValueAsString((T) nodeOutputs[1]), expr); if (colInfo == null) { ctx.setError(ASTErrorUtils.getMsg( @@ -174,7 +175,7 @@ protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, } ColumnInfo newColumnInfo = new ColumnInfo(colInfo); newColumnInfo.setTabAlias(tableAlias); - return exprFactory.createColumnRefExpr(newColumnInfo); + return exprFactory.createColumnRefExpr(newColumnInfo, jctx.getInputRRList()); } private ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java new file mode 100644 index 0000000000..8bbb19f202 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java @@ -0,0 +1,1021 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableList; +import java.math.BigDecimal; +import java.time.Instant; +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.apache.calcite.avatica.util.TimeUnit; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlCollation; +import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlQuantifyOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ConversionUtil; +import org.apache.calcite.util.DateString; +import org.apache.calcite.util.NlsString; +import org.apache.calcite.util.TimestampString; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString.Interpretation; +import org.apache.hadoop.hive.ql.plan.SubqueryType; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Expression factory for Calcite {@link RexNode}. + */ +public class RexNodeExprFactory extends ExprFactory { + + private static final Logger LOG = LoggerFactory.getLogger(RexNodeExprFactory.class); + + private final RexBuilder rexBuilder; + private final FunctionHelper functionHelper; + + public RexNodeExprFactory(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + this.functionHelper = new HiveFunctionHelper(rexBuilder); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isExprInstance(Object o) { + return o instanceof RexNode; + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode toExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws CalciteSemanticException { + ObjectInspector inspector = colInfo.getObjectInspector(); + if (inspector instanceof ConstantObjectInspector && inspector instanceof PrimitiveObjectInspector) { + return toPrimitiveConstDesc(colInfo, inspector, rexBuilder); + } + int index = rowResolver.getPosition(colInfo.getInternalName()); + if (index < 0) { + throw new CalciteSemanticException("Unexpected error: Cannot find column"); + } + return rexBuilder.makeInputRef( + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index + offset); + } + + private static RexNode toPrimitiveConstDesc( + ColumnInfo colInfo, ObjectInspector inspector, RexBuilder rexBuilder) + throws CalciteSemanticException { + Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue(); + return rexBuilder.makeLiteral(constant, + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createColumnRefExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws CalciteSemanticException { + int index = rowResolver.getPosition(colInfo.getInternalName()); + return rexBuilder.makeInputRef( + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index + offset); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createColumnRefExpr(ColumnInfo colInfo, List rowResolverList) + throws SemanticException { + int index = getPosition(colInfo, rowResolverList); + return rexBuilder.makeInputRef( + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index); + } + + private int getPosition(ColumnInfo colInfo, List rowResolverList) + throws SemanticException { + ColumnInfo tmp; + ColumnInfo cInfoToRet = null; + int position = 0; + for (RowResolver rr : rowResolverList) { + tmp = rr.get(colInfo.getTabAlias(), colInfo.getAlias()); + if (tmp != null) { + if (cInfoToRet != null) { + throw new CalciteSemanticException("Could not resolve column name"); + } + cInfoToRet = tmp; + position += rr.getPosition(cInfoToRet.getInternalName()); + } else if (cInfoToRet == null) { + position += rr.getColumnInfos().size(); + } + } + return position; + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createNullConstantExpr() { + return rexBuilder.makeNullLiteral( + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createBooleanConstantExpr(String value) { + Boolean b = value != null ? Boolean.valueOf(value) : null; + return rexBuilder.makeLiteral(b, + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BOOLEAN), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createBigintConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Long.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createIntConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Integer.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.INTEGER), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createSmallintConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Short.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.SMALLINT), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createTinyintConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Byte.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TINYINT), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createFloatConstantExpr(String value) { + Float f = Float.valueOf(value); + return rexBuilder.makeApproxLiteral( + new BigDecimal(Float.toString(f)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.FLOAT)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createDoubleConstantExpr(String value) throws SemanticException { + Double d = Double.valueOf(value); + // TODO: The best solution is to support NaN in expression reduction. + if (Double.isNaN(d)) { + throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal); + } + return rexBuilder.makeApproxLiteral( + new BigDecimal(Double.toString(d)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createDecimalConstantExpr(String value, boolean allowNullValueConstantExpr) { + HiveDecimal hd = HiveDecimal.create(value); + if (!allowNullValueConstantExpr && hd == null) { + return null; + } + DecimalTypeInfo type = adjustType(hd); + return rexBuilder.makeExactLiteral( + hd != null ? hd.bigDecimalValue() : null, + TypeConverter.convert(type, rexBuilder.getTypeFactory())); + } + + @Override + protected TypeInfo adjustConstantType(PrimitiveTypeInfo targetType, Object constantValue) { + if (constantValue instanceof HiveDecimal) { + return adjustType((HiveDecimal) constantValue); + } + return targetType; + } + + private DecimalTypeInfo adjustType(HiveDecimal hd) { + // Note: the normalize() call with rounding in HiveDecimal will currently reduce the + // precision and scale of the value by throwing away trailing zeroes. This may or may + // not be desirable for the literals; however, this used to be the default behavior + // for explicit decimal literals (e.g. 1.0BD), so we keep this behavior for now. + int prec = 1; + int scale = 0; + if (hd != null) { + prec = hd.precision(); + scale = hd.scale(); + } + DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale); + return typeInfo; + } + + /** + * {@inheritDoc} + */ + @Override + protected Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetType, Object constantValue, + PrimitiveTypeInfo sourceType) { + // Extract string value if necessary + Object constantToInterpret = constantValue; + if (constantValue instanceof HiveNlsString) { + constantToInterpret = ((HiveNlsString) constantValue).getValue(); + } + + if (constantToInterpret instanceof Number || constantToInterpret instanceof String) { + try { + PrimitiveTypeEntry primitiveTypeEntry = targetType.getPrimitiveTypeEntry(); + if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).intValueExact(); + } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).longValueExact(); + } else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { + return Double.valueOf(constantToInterpret.toString()); + } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { + return Float.valueOf(constantToInterpret.toString()); + } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).byteValueExact(); + } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).shortValueExact(); + } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) { + HiveDecimal decimal = HiveDecimal.create(constantToInterpret.toString()); + return decimal != null ? decimal.bigDecimalValue() : null; + } + } catch (NumberFormatException | ArithmeticException nfe) { + LOG.trace("Failed to narrow type of constant", nfe); + return null; + } + } + + // Comparision of decimal and float/double happens in float/double. + if (constantToInterpret instanceof BigDecimal) { + BigDecimal bigDecimal = (BigDecimal) constantToInterpret; + + PrimitiveTypeEntry primitiveTypeEntry = targetType.getPrimitiveTypeEntry(); + if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { + return bigDecimal.doubleValue(); + } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { + return bigDecimal.floatValue(); + } + return bigDecimal; + } + + String constTypeInfoName = sourceType.getTypeName(); + if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + // because a comparison against a "string" will happen in "string" type. + // to avoid unintentional comparisons in "string" + // constants which are representing char/varchar values must be converted to the + // appropriate type. + if (targetType instanceof CharTypeInfo) { + final String constValue = constantToInterpret.toString(); + final int length = TypeInfoUtils.getCharacterLengthForType(targetType); + HiveChar newValue = new HiveChar(constValue, length); + HiveChar maxCharConst = new HiveChar(constValue, HiveChar.MAX_CHAR_LENGTH); + if (maxCharConst.equals(newValue)) { + return makeHiveUnicodeString(Interpretation.CHAR, newValue.getValue()); + } else { + return null; + } + } + if (targetType instanceof VarcharTypeInfo) { + final String constValue = constantToInterpret.toString(); + final int length = TypeInfoUtils.getCharacterLengthForType(targetType); + HiveVarchar newValue = new HiveVarchar(constValue, length); + HiveVarchar maxCharConst = new HiveVarchar(constValue, HiveVarchar.MAX_VARCHAR_LENGTH); + if (maxCharConst.equals(newValue)) { + return makeHiveUnicodeString(Interpretation.VARCHAR, newValue.getValue()); + } else { + return null; + } + } + } + + return constantValue; + } + + private BigDecimal toBigDecimal(String val) { + if (!NumberUtils.isNumber(val)) { + throw new NumberFormatException("The given string is not a valid number: " + val); + } + return new BigDecimal(val.replaceAll("[dDfFlL]$", "")); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createStringConstantExpr(String value) { + return rexBuilder.makeCharLiteral( + makeHiveUnicodeString(Interpretation.STRING, value)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createDateConstantExpr(String value) { + Date d = Date.valueOf(value); + return rexBuilder.makeDateLiteral( + DateString.fromDaysSinceEpoch(d.toEpochDay())); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createTimestampConstantExpr(String value) { + Timestamp t = Timestamp.valueOf(value); + return (RexLiteral) rexBuilder.makeLiteral( + TimestampString.fromMillisSinceEpoch(t.toEpochMilli()).withNanos(t.getNanos()), + rexBuilder.getTypeFactory().createSqlType( + SqlTypeName.TIMESTAMP, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createTimestampLocalTimeZoneConstantExpr(String value, ZoneId zoneId) { + TimestampTZ t = TimestampTZUtil.parse(value); + + final TimestampString tsLocalTZString; + if (value == null) { + tsLocalTZString = null; + } else { + Instant i = t.getZonedDateTime().toInstant(); + tsLocalTZString = TimestampString + .fromMillisSinceEpoch(i.toEpochMilli()) + .withNanos(i.getNano()); + } + return rexBuilder.makeTimestampWithLocalTimeZoneLiteral( + tsLocalTZString, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalYearMonthConstantExpr(String value) { + BigDecimal totalMonths = BigDecimal.valueOf(HiveIntervalYearMonth.valueOf(value).getTotalMonths()); + return rexBuilder.makeIntervalLiteral(totalMonths, + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalDayTimeConstantExpr(String value) { + HiveIntervalDayTime v = HiveIntervalDayTime.valueOf(value); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalYearConstantExpr(String value) { + HiveIntervalYearMonth v = new HiveIntervalYearMonth(Integer.parseInt(value), 0); + BigDecimal totalMonths = BigDecimal.valueOf(v.getTotalMonths()); + return rexBuilder.makeIntervalLiteral(totalMonths, + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalMonthConstantExpr(String value) { + BigDecimal totalMonths = BigDecimal.valueOf(Integer.parseInt(value)); + return rexBuilder.makeIntervalLiteral(totalMonths, + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalDayConstantExpr(String value) { + HiveIntervalDayTime v = new HiveIntervalDayTime(Integer.parseInt(value), 0, 0, 0, 0); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalHourConstantExpr(String value) { + HiveIntervalDayTime v = new HiveIntervalDayTime(0, Integer.parseInt(value), 0, 0, 0); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalMinuteConstantExpr(String value) { + HiveIntervalDayTime v = new HiveIntervalDayTime(0, 0, Integer.parseInt(value), 0, 0); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalSecondConstantExpr(String value) { + BigDecimal bd = new BigDecimal(value); + BigDecimal bdSeconds = new BigDecimal(bd.toBigInteger()); + BigDecimal bdNanos = bd.subtract(bdSeconds); + HiveIntervalDayTime v = new HiveIntervalDayTime(0, 0, 0, bdSeconds.intValueExact(), + bdNanos.multiply(NANOS_PER_SEC_BD).intValue()); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createStructExpr(TypeInfo typeInfo, List operands) + throws CalciteSemanticException { + assert typeInfo instanceof StructTypeInfo; + return rexBuilder.makeCall( + TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), + SqlStdOperatorTable.ROW, + operands); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createConstantExpr(TypeInfo typeInfo, Object constantValue) + throws CalciteSemanticException { + if (typeInfo instanceof StructTypeInfo) { + List typeList = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos(); + List objectList = (List) constantValue; + List operands = new ArrayList<>(); + for (int i = 0; i < typeList.size(); i++) { + operands.add( + rexBuilder.makeLiteral( + objectList.get(i), + TypeConverter.convert(typeList.get(i), rexBuilder.getTypeFactory()), + false)); + } + return rexBuilder.makeCall( + TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), + SqlStdOperatorTable.ROW, + operands); + } + return rexBuilder.makeLiteral(constantValue, + TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createNestedColumnRefExpr( + TypeInfo typeInfo, RexNode expr, String fieldName, Boolean isList) throws CalciteSemanticException { + if (expr.getType().isStruct()) { + // regular case of accessing nested field in a column + return rexBuilder.makeFieldAccess(expr, fieldName, true); + } else { + // This may happen for schema-less tables, where columns are dynamically + // supplied by serdes. + throw new CalciteSemanticException("Unexpected rexnode : " + + expr.getClass().getCanonicalName(), UnsupportedFeature.Schema_less_table); + } + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createFuncCallExpr(TypeInfo returnType, GenericUDF genericUDF, + List inputs) throws SemanticException { + final String funcText = genericUDF.getClass().getAnnotation(Description.class).name(); + final FunctionInfo functionInfo = functionHelper.getFunctionInfo(funcText); + return functionHelper.getExpression( + funcText, functionInfo, inputs, + TypeConverter.convert(returnType, rexBuilder.getTypeFactory())); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createFuncCallExpr(GenericUDF genericUDF, String funcText, + List inputs) throws SemanticException { + // 1) Function resolution + final FunctionInfo functionInfo = functionHelper.getFunctionInfo(funcText); + // 2) Compute return type + RelDataType returnType; + if (genericUDF instanceof SettableUDF) { + returnType = TypeConverter.convert( + ((SettableUDF) genericUDF).getTypeInfo(), rexBuilder.getTypeFactory()); + } else { + returnType = functionHelper.getReturnType(functionInfo, inputs); + } + // 3) Convert inputs (if necessary) + List newInputs = functionHelper.convertInputs( + functionInfo, inputs, returnType); + // 4) Return Calcite function + return functionHelper.getExpression( + funcText, functionInfo, newInputs, returnType); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createExprsListExpr() { + return new HiveRexExprList(); + } + + /** + * {@inheritDoc} + */ + @Override + protected void addExprToExprsList(RexNode columnList, RexNode expr) { + HiveRexExprList l = (HiveRexExprList) columnList; + l.addExpression(expr); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isConstantExpr(Object o) { + return o instanceof RexLiteral; + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isFuncCallExpr(Object o) { + return o instanceof RexCall; + } + + /** + * {@inheritDoc} + */ + @Override + protected Object getConstantValue(RexNode expr) { + if (expr.getType().getSqlTypeName() == SqlTypeName.ROW) { + List res = new ArrayList<>(); + for (RexNode node : ((RexCall) expr).getOperands()) { + res.add(((RexLiteral) node).getValue4()); + } + return res; + } + return ((RexLiteral) expr).getValue4(); + } + + /** + * {@inheritDoc} + */ + @Override + protected String getConstantValueAsString(RexNode expr) { + return ((RexLiteral) expr).getValueAs(String.class); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isColumnRefExpr(Object o) { + return o instanceof RexNode && RexUtil.isReferenceOrAccess((RexNode) o, true); + } + + /** + * {@inheritDoc} + */ + @Override + protected String getColumnName(RexNode expr, RowResolver rowResolver) { + int index = ((RexInputRef) expr).getIndex(); + return rowResolver.getColumnInfos().get(index).getInternalName(); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isExprsListExpr(Object o) { + return o instanceof HiveRexExprList; + } + + /** + * {@inheritDoc} + */ + @Override + protected List getExprChildren(RexNode expr) { + if (expr instanceof RexCall) { + return ((RexCall) expr).getOperands(); + } else if (expr instanceof HiveRexExprList) { + return ((HiveRexExprList) expr).getExpressions(); + } + return new ArrayList<>(); + } + + /** + * {@inheritDoc} + */ + @Override + protected TypeInfo getTypeInfo(RexNode expr) { + return expr.isA(SqlKind.LITERAL) ? + TypeConverter.convertLiteralType((RexLiteral) expr) : + TypeConverter.convert(expr.getType()); + } + + /** + * {@inheritDoc} + */ + @Override + protected List getStructTypeInfoList(RexNode expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) TypeConverter.convert(expr.getType()); + return structTypeInfo.getAllStructFieldTypeInfos(); + } + + /** + * {@inheritDoc} + */ + @Override + protected List getStructNameList(RexNode expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) TypeConverter.convert(expr.getType()); + return structTypeInfo.getAllStructFieldNames(); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isORFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.OR); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isANDFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.AND); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isPOSITIVEFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.PLUS_PREFIX); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isNEGATIVEFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.MINUS_PREFIX); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode setTypeInfo(RexNode expr, TypeInfo type) throws CalciteSemanticException { + RelDataType t = TypeConverter.convert(type, rexBuilder.getTypeFactory()); + if (expr instanceof RexCall) { + RexCall call = (RexCall) expr; + return rexBuilder.makeCall(t, + call.getOperator(), call.getOperands()); + } else if (expr instanceof RexInputRef) { + RexInputRef inputRef = (RexInputRef) expr; + return rexBuilder.makeInputRef(t, inputRef.getIndex()); + } else if (expr instanceof RexLiteral) { + RexLiteral literal = (RexLiteral) expr; + return rexBuilder.makeLiteral(RexLiteral.value(literal), t, false); + } + throw new RuntimeException("Unsupported expression type: " + expr.getClass().getCanonicalName()); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean canConvertCASEIntoCOALESCEFuncCallExpr(GenericUDF genericUDF, List inputs) { + if (genericUDF instanceof GenericUDFWhen && inputs.size() == 3 && + inputs.get(1) instanceof RexLiteral && + inputs.get(2) instanceof RexLiteral) { + RexLiteral constThen = (RexLiteral) inputs.get(1); + RexLiteral constElse = (RexLiteral) inputs.get(2); + Object thenVal = constThen.getValue(); + Object elseVal = constElse.getValue(); + if (thenVal instanceof Boolean && elseVal instanceof Boolean) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode foldExpr(RexNode expr) { + return functionHelper.foldExpression(expr); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isSTRUCTFuncCallExpr(RexNode expr) { + return expr instanceof RexCall && + ((RexCall) expr).getOperator() == SqlStdOperatorTable.ROW; + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isConstantStruct(RexNode expr) { + return expr.getType().getSqlTypeName() == SqlTypeName.ROW && + HiveCalciteUtil.isConstant(expr); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createSubqueryExpr(TypeCheckCtx ctx, ASTNode expr, SubqueryType subqueryType, + Object[] inputs) throws SemanticException { + // subqueryToRelNode might be null if subquery expression anywhere other than + // as expected in filter (where/having). We should throw an appropriate error + // message + Map subqueryToRelNode = ctx.getSubqueryToRelNode(); + if (subqueryToRelNode == null) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + " Currently SubQuery expressions are only allowed as " + + "Where and Having Clause predicates")); + } + + ASTNode subqueryOp = (ASTNode) expr.getChild(0); + RelNode subqueryRel = subqueryToRelNode.get(expr); + // For now because subquery is only supported in filter + // we will create subquery expression of boolean type + switch (subqueryType) { + case EXISTS: { + return RexSubQuery.exists(subqueryRel); + } + case IN: { + assert (inputs[2] != null); + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if(subqueryRel.getRowType().getFieldCount() > 1) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "SubQuery can contain only 1 item in Select List.")); + } + //create RexNode for LHS + RexNode lhs = (RexNode) inputs[2]; + //create RexSubQuery node + return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + } + case SCALAR: { + // only single subquery expr is supported + if (subqueryRel.getRowType().getFieldCount() != 1) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "More than one column expression in subquery")); + } + if(subqueryRel.getRowType().getFieldCount() > 1) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "SubQuery can contain only 1 item in Select List.")); + } + //create RexSubQuery node + return RexSubQuery.scalar(subqueryRel); + } + case SOME: + case ALL: { + assert (inputs[2] != null); + //create RexNode for LHS + RexNode lhs = (RexNode) inputs[2]; + return convertSubquerySomeAll(subqueryRel.getCluster(), + (ASTNode) subqueryOp.getChild(1), subqueryType, subqueryRel, lhs); + } + default: + return null; + } + } + + private static void throwInvalidSubqueryError(final ASTNode comparisonOp) throws SemanticException { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + + public static RexNode convertSubquerySomeAll(final RelOptCluster cluster, final ASTNode comparisonOp, + final SubqueryType subqueryType, final RelNode subqueryRel, final RexNode rexNodeLhs) + throws SemanticException { + SqlQuantifyOperator quantifyOperator = null; + switch (comparisonOp.getType()) { + case HiveParser.EQUAL: + if(subqueryType == SubqueryType.ALL) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_EQ; + break; + case HiveParser.LESSTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_LT; + break; + case HiveParser.LESSTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_LE; + break; + case HiveParser.GREATERTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_GT; + break; + case HiveParser.GREATERTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_GE; + break; + case HiveParser.NOTEQUAL: + if(subqueryType == SubqueryType.SOME) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_NE; + break; + default: + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + if(subqueryType == SubqueryType.ALL) { + quantifyOperator = SqlStdOperatorTable.some(quantifyOperator.comparisonKind.negateNullSafe()); + } + + RexNode someQuery = getSomeSubquery(cluster, subqueryRel, rexNodeLhs, quantifyOperator); + if(subqueryType == SubqueryType.ALL) { + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, someQuery); + } + return someQuery; + } + + private static RexNode getSomeSubquery(final RelOptCluster cluster, + final RelNode subqueryRel, final RexNode lhs, + final SqlQuantifyOperator quantifyOperator) { + if(quantifyOperator == SqlStdOperatorTable.SOME_EQ) { + return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + } else if (quantifyOperator == SqlStdOperatorTable.SOME_NE) { + RexSubQuery subQuery = RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, subQuery); + } else { + return RexSubQuery.some(subqueryRel, ImmutableList.of(lhs), quantifyOperator); + } + } + + public static NlsString makeHiveUnicodeString(Interpretation interpretation, String text) { + return new HiveNlsString(interpretation, text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT); + } + + public static class HiveNlsString extends NlsString { + + public enum Interpretation { + CHAR, VARCHAR, STRING; + } + + public final Interpretation interpretation; + + public HiveNlsString(Interpretation interpretation, String value, String charsetName, SqlCollation collation) { + super(value, charsetName, collation); + this.interpretation = interpretation; + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java new file mode 100644 index 0000000000..c9131ec018 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import java.util.Map; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class RexNodeTypeCheck { + + /** + * Given an AST expression and a context, it will produce a map from AST nodes + * to Calcite RexNode. + */ + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + TypeCheckProcFactory factory = + new TypeCheckProcFactory<>(new RexNodeExprFactory(tcCtx.getRexBuilder())); + return factory.genExprNode(expr, tcCtx); + } + + /** + * Returns the default processor to generate Calcite RexNode from AST nodes. + */ + public static TypeCheckProcFactory.DefaultExprProcessor getExprNodeDefaultExprProcessor(RexBuilder rexBuilder) { + TypeCheckProcFactory factory = + new TypeCheckProcFactory<>(new RexNodeExprFactory(rexBuilder)); + return factory.getDefaultExprProcessor(); + } + + /** + * Given an AST join expression and a context, it will produce a map from AST nodes + * to Calcite RexNode. + */ + public static Map genExprNodeJoinCond(ASTNode expr, TypeCheckCtx tcCtx, RexBuilder rexBuilder) + throws SemanticException { + JoinCondTypeCheckProcFactory typeCheckProcFactory = + new JoinCondTypeCheckProcFactory<>(new RexNodeExprFactory(rexBuilder)); + return typeCheckProcFactory.genExprNode(expr, tcCtx); + } + + /** + * Transforms column information into the corresponding Calcite RexNode. + */ + public static RexNode toExprNode(ColumnInfo columnInfo, RowResolver rowResolver, int offset, RexBuilder rexBuilder) + throws SemanticException { + RexNodeExprFactory factory = new RexNodeExprFactory(rexBuilder); + return factory.toExpr(columnInfo, rowResolver, offset); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java index 67875cc31f..d4ef3faec0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse.type; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexBuilder; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.RowResolver; @@ -93,6 +94,8 @@ private final boolean allowSubQueryExpr; + private RexBuilder rexBuilder; + /** * Constructor. * @@ -103,11 +106,27 @@ public TypeCheckCtx(RowResolver inputRR) { this(inputRR, true, false); } + public TypeCheckCtx(RowResolver inputRR, RexBuilder rexBuilder) { + this(inputRR, rexBuilder, true, false); + } + public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr) { this(inputRR, useCaching, foldExpr, false, true, true, true, true, true, true, true); } + public TypeCheckCtx(RowResolver inputRR, RexBuilder rexBuilder, boolean useCaching, boolean foldExpr) { + this(inputRR, rexBuilder, useCaching, foldExpr, false, true, true, true, true, true, true, true); + } + public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr, + boolean allowStatefulFunctions, boolean allowDistinctFunctions, boolean allowGBExprElimination, + boolean allowAllColRef, boolean allowFunctionStar, boolean allowWindowing, + boolean allowIndexExpr, boolean allowSubQueryExpr) { + this(inputRR, null, useCaching, foldExpr, allowStatefulFunctions, allowDistinctFunctions, allowGBExprElimination, + allowAllColRef, allowFunctionStar, allowWindowing, allowIndexExpr, allowSubQueryExpr); + } + + public TypeCheckCtx(RowResolver inputRR, RexBuilder rexBuilder, boolean useCaching, boolean foldExpr, boolean allowStatefulFunctions, boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, boolean allowFunctionStar, boolean allowWindowing, boolean allowIndexExpr, boolean allowSubQueryExpr) { @@ -125,6 +144,7 @@ public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr, this.allowSubQueryExpr = allowSubQueryExpr; this.outerRR = null; this.subqueryToRelNode = null; + this.rexBuilder = rexBuilder; } /** @@ -271,4 +291,8 @@ public boolean isFoldExpr() { public boolean isCBOExecuted() { return foldExpr; } + + public RexBuilder getRexBuilder() { + return rexBuilder; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java index 4615fc5729..c31ef92cb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java @@ -18,8 +18,11 @@ package org.apache.hadoop.hive.ql.parse.type; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ListMultimap; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -28,7 +31,9 @@ import java.util.Stack; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; @@ -54,9 +59,9 @@ import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.SubqueryType; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -610,8 +615,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean isTableAlias = input.hasTableAlias(tableOrCol); ColumnInfo colInfo = null; + RowResolver usedRR = null; + int offset = 0; try { colInfo = input.get(null, tableOrCol); + usedRR = input; } catch (SemanticException semanticException) { if (!isTableAlias || parent == null || parent.getType() != HiveParser.DOT) { throw semanticException; @@ -622,6 +630,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver outerRR = ctx.getOuterRR(); isTableAlias = outerRR.hasTableAlias(tableOrCol); colInfo = outerRR.get(null, tableOrCol); + usedRR = outerRR; + offset = input.getColumnInfos().size(); } if (isTableAlias) { @@ -631,7 +641,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } // It's a column. - return exprFactory.toExpr(colInfo); + return exprFactory.toExpr(colInfo, usedRR, offset); } else { // It's a table alias. // We will process that later in DOT. @@ -665,7 +675,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // It's a column. - return exprFactory.toExpr(colInfo); + return exprFactory.toExpr(colInfo, usedRR, offset); } } } @@ -714,7 +724,7 @@ protected boolean isRedundantConversionFunction(ASTNode expr, * @throws UDFArgumentException */ public T getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, - T... children) throws UDFArgumentException { + T... children) throws SemanticException { FunctionInfo fi; try { @@ -742,10 +752,10 @@ public T getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, List childrenList = new ArrayList<>(children.length); childrenList.addAll(Arrays.asList(children)); - return exprFactory.createFuncCallExpr(genericUDF, null, childrenList); + return exprFactory.createFuncCallExpr(genericUDF, udfName, childrenList); } - public T getFuncExprNodeDesc(String udfName, T... children) throws UDFArgumentException { + public T getFuncExprNodeDesc(String udfName, T... children) throws SemanticException { return getFuncExprNodeDescWithUdfData(udfName, null, children); } @@ -768,6 +778,29 @@ public T createConversionCast(T column, PrimitiveTypeInfo tableFieldTypeInfo) protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, List children, GenericUDF genericUDF) throws SemanticException { + // Check if a bigint is implicitely cast to a double as part of a comparison + // Perform the check here instead of in GenericUDFBaseCompare to guarantee it is only run once per operator + if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2) { + TypeInfo oiTypeInfo0 = exprFactory.getTypeInfo(children.get(0)); + TypeInfo oiTypeInfo1 = exprFactory.getTypeInfo(children.get(1)); + + SessionState ss = SessionState.get(); + Configuration conf = (ss != null) ? ss.getConf() : new Configuration(); + + LogHelper console = new LogHelper(LOG); + + // For now, if a bigint is going to be cast to a double throw an error or warning + if ((oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || + (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo))) { + String error = StrictChecks.checkTypeSafety(conf); + if (error != null) throw new UDFArgumentException(error); + console.printError("WARNING: Comparing a bigint and a string may result in a loss of precision."); + } else if ((oiTypeInfo0.equals(TypeInfoFactory.doubleTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || + (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.doubleTypeInfo))) { + console.printError("WARNING: Comparing a bigint and a double may result in a loss of precision."); + } + } + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't // supported if (fi.getGenericUDTF() != null) { @@ -801,17 +834,17 @@ protected void insertCast(String funcText, List children) throws SemanticExce } } - protected T getXpathOrFuncExprNodeDesc(ASTNode expr, + protected T getXpathOrFuncExprNodeDesc(ASTNode node, boolean isFunction, List children, TypeCheckCtx ctx) - throws SemanticException, UDFArgumentException { + throws SemanticException { // return the child directly if the conversion is redundant. - if (isRedundantConversionFunction(expr, isFunction, children)) { + if (isRedundantConversionFunction(node, isFunction, children)) { assert (children.size() == 1); assert (children.get(0) != null); return children.get(0); } - String funcText = getFunctionText(expr, isFunction); - T desc; + String funcText = getFunctionText(node, isFunction); + T expr; if (funcText.equals(".")) { // "." : FIELD Expression @@ -821,7 +854,7 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, T object = children.get(0); // Calculate result TypeInfo - String fieldNameString = exprFactory.getConstantValue(children.get(1)).toString(); + String fieldNameString = exprFactory.getConstantValueAsString(children.get(1)); TypeInfo objectTypeInfo = exprFactory.getTypeInfo(object); // Allow accessing a field of list element structs directly from a list @@ -831,19 +864,19 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, } if (objectTypeInfo.getCategory() != Category.STRUCT) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_DOT.getMsg(), expr)); + ErrorMsg.INVALID_DOT.getMsg(), node)); } TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString); if (isList) { t = TypeInfoFactory.getListTypeInfo(t); } - desc = exprFactory.createNestedColumnRefExpr(t, children.get(0), fieldNameString, isList); + expr = exprFactory.createNestedColumnRefExpr(t, children.get(0), fieldNameString, isList); } else if (funcText.equals("[")) { // "[]" : LSQUARE/INDEX Expression if (!ctx.getallowIndexExpr()) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_FUNCTION.getMsg(), expr)); + ErrorMsg.INVALID_FUNCTION.getMsg(), node)); } assert (children.size() == 2); @@ -856,24 +889,24 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)), TypeInfoFactory.intTypeInfo)) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); + node, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); } // Calculate TypeInfo TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); - desc = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); + expr = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); } else if (myt.getCategory() == Category.MAP) { if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)), ((MapTypeInfo) myt).getMapKeyTypeInfo())) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(), expr)); + ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(), node)); } // Calculate TypeInfo TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); - desc = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); + expr = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); } else { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.NON_COLLECTION_TYPE.getMsg(), expr, myt.getTypeName())); + ErrorMsg.NON_COLLECTION_TYPE.getMsg(), node, myt.getTypeName())); } } else { // other operators or functions @@ -882,10 +915,10 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, if (fi == null) { if (isFunction) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_FUNCTION.getMsg(), (ASTNode) expr.getChild(0))); + ErrorMsg.INVALID_FUNCTION.getMsg(), (ASTNode) node.getChild(0))); } else { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_FUNCTION.getMsg(), expr)); + ErrorMsg.INVALID_FUNCTION.getMsg(), node)); } } @@ -894,12 +927,12 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, if (!fi.isNative()) { ctx.getUnparseTranslator().addIdentifierTranslation( - (ASTNode) expr.getChild(0)); + (ASTNode) node.getChild(0)); } // Handle type casts that may contain type parameters if (isFunction) { - ASTNode funcNameNode = (ASTNode) expr.getChild(0); + ASTNode funcNameNode = (ASTNode) node.getChild(0); switch (funcNameNode.getType()) { case HiveParser.TOK_CHAR: // Add type params @@ -941,25 +974,25 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, insertCast(funcText, children); - validateUDF(expr, isFunction, ctx, fi, children, genericUDF); + validateUDF(node, isFunction, ctx, fi, children, genericUDF); // Try to infer the type of the constant only if there are two // nodes, one of them is column and the other is numeric const if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2 - && ((children.get(0) instanceof ExprNodeConstantDesc - && children.get(1) instanceof ExprNodeColumnDesc) - || (children.get(0) instanceof ExprNodeColumnDesc - && children.get(1) instanceof ExprNodeConstantDesc))) { + && ((exprFactory.isConstantExpr(children.get(0)) + && exprFactory.isColumnRefExpr(children.get(1))) + || (exprFactory.isColumnRefExpr(children.get(0)) + && exprFactory.isConstantExpr(children.get(1))))) { - int constIdx = children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; + int constIdx = exprFactory.isConstantExpr(children.get(0)) ? 0 : 1; T constChild = children.get(constIdx); T columnChild = children.get(1 - constIdx); - final PrimitiveTypeInfo colTypeInfo = - TypeInfoFactory.getPrimitiveTypeInfo(exprFactory.getTypeInfo(columnChild).getTypeName().toLowerCase()); - T newChild = interpretNodeAs(colTypeInfo, constChild); + final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo( + exprFactory.getTypeInfo(columnChild).getTypeName().toLowerCase()); + T newChild = interpretNodeAsConstant(colTypeInfo, constChild); if (newChild == null) { // non-interpretable as target type... // TODO: all comparisons with null should result in null @@ -972,55 +1005,42 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, } } if (genericUDF instanceof GenericUDFIn) { - - T columnDesc = children.get(0); - List outputOpList = children.subList(1, children.size()); - List inOperands = new ArrayList<>(outputOpList); - outputOpList.clear(); - - boolean hasNullValue = false; - for (T oldChild : inOperands) { - if (oldChild == null) { - hasNullValue = true; - continue; - } - T newChild = interpretNodeAsStruct(columnDesc, oldChild); - if (newChild == null) { - hasNullValue = true; - continue; - } - outputOpList.add(newChild); - } - - if (hasNullValue) { - T nullConst = exprFactory.createConstantExpr(exprFactory.getTypeInfo(columnDesc), null); - if (outputOpList.size() == 0) { - // we have found only null values...remove the IN ; it will be null all the time. - return nullConst; + ListMultimap expressions = ArrayListMultimap.create(); + for (int i = 1; i < children.size(); i++) { + T columnDesc = children.get(0); + T valueDesc = interpretNodeAsConstantStruct(columnDesc, children.get(i)); + if (valueDesc == null) { + TypeInfo targetType = exprFactory.getTypeInfo(columnDesc); + if (!expressions.containsKey(targetType)) { + expressions.put(targetType, columnDesc); + } + T nullConst = exprFactory.createConstantExpr(targetType, null); + expressions.put(targetType, nullConst); + } else { + TypeInfo targetType = exprFactory.getTypeInfo(valueDesc); + if (!expressions.containsKey(targetType)) { + expressions.put(targetType, columnDesc); + } + expressions.put(targetType, valueDesc); } - outputOpList.add(nullConst); } - if (!ctx.isCBOExecuted()) { - - HiveConf conf; - try { - conf = Hive.get().getConf(); - } catch (HiveException e) { - throw new SemanticException(e); - } - if (children.size() <= HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES)) { - List orOperands = exprFactory.rewriteINIntoORFuncCallExpr(children); - if (orOperands != null) { - if (orOperands.size() == 1) { - orOperands.add(exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString())); - } - funcText = "or"; - genericUDF = new GenericUDFOPOr(); - children.clear(); - children.addAll(orOperands); - } + children.clear(); + List newExprs = new ArrayList<>(); + int numEntries = expressions.keySet().size(); + if (numEntries == 1) { + children.addAll(expressions.asMap().values().iterator().next()); + funcText = "in"; + genericUDF = new GenericUDFIn(); + } else { + for (Collection c : expressions.asMap().values()) { + newExprs.add( + exprFactory.createFuncCallExpr( + new GenericUDFIn(), "in", (List) c)); } + children.addAll(newExprs); + funcText = "or"; + genericUDF = new GenericUDFOPOr(); } } if (genericUDF instanceof GenericUDFOPOr) { @@ -1036,7 +1056,7 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, childrenList.add(child); } } - desc = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); + expr = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); } else if (genericUDF instanceof GenericUDFOPAnd) { // flatten AND List childrenList = new ArrayList<>(children.size()); @@ -1050,111 +1070,130 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, childrenList.add(child); } } - desc = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); + expr = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); } else if (ctx.isFoldExpr() && exprFactory.canConvertCASEIntoCOALESCEFuncCallExpr(genericUDF, children)) { // Rewrite CASE into COALESCE - desc = exprFactory.createFuncCallExpr(new GenericUDFCoalesce(), null, + expr = exprFactory.createFuncCallExpr(new GenericUDFCoalesce(), "coalesce", Lists.newArrayList(children.get(0), exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString()))); if (Boolean.FALSE.equals(exprFactory.getConstantValue(children.get(1)))) { - desc = exprFactory.createFuncCallExpr(new GenericUDFOPNot(), null, Lists.newArrayList(desc)); + expr = exprFactory.createFuncCallExpr(new GenericUDFOPNot(), "not", Lists.newArrayList(expr)); } } else { - desc = exprFactory.createFuncCallExpr(genericUDF, funcText, children); + expr = exprFactory.createFuncCallExpr(genericUDF, funcText, children); } // If the function is deterministic and the children are constants, // we try to fold the expression to remove e.g. cast on constant - if (ctx.isFoldExpr() && exprFactory.isFuncCallExpr(desc) && + if (ctx.isFoldExpr() && exprFactory.isFuncCallExpr(expr) && FunctionRegistry.isConsistentWithinQuery(genericUDF) && exprFactory.isAllConstants(children)) { - T constantExpr = exprFactory.foldExpr(desc); + T constantExpr = exprFactory.foldExpr(expr); if (constantExpr != null) { - desc = constantExpr; + expr = constantExpr; } } } - // UDFOPPositive is a no-op. - // However, we still create it, and then remove it here, to make sure we - // only allow - // "+" for numeric types. - if (exprFactory.isPOSITIVEFuncCallExpr(desc)) { - assert (exprFactory.getExprChildren(desc).size() == 1); - desc = exprFactory.getExprChildren(desc).get(0); + + if (exprFactory.isPOSITIVEFuncCallExpr(expr)) { + // UDFOPPositive is a no-op. + assert (exprFactory.getExprChildren(expr).size() == 1); + expr = exprFactory.getExprChildren(expr).get(0); + } else if (exprFactory.isNEGATIVEFuncCallExpr(expr)) { + // UDFOPNegative should always be folded. + assert (exprFactory.getExprChildren(expr).size() == 1); + T input = exprFactory.getExprChildren(expr).get(0); + if (exprFactory.isConstantExpr(input)) { + T constantExpr = exprFactory.foldExpr(expr); + if (constantExpr != null) { + expr = constantExpr; + } + } } - assert (desc != null); - return desc; + assert (expr != null); + return expr; } /** * Interprets the given value as columnDesc if possible */ - private T interpretNodeAsStruct(T columnDesc, T valueDesc) + private T interpretNodeAsConstantStruct(T columnDesc, T valueDesc) throws SemanticException { if (exprFactory.isColumnRefExpr(columnDesc)) { - final PrimitiveTypeInfo typeInfo = - TypeInfoFactory.getPrimitiveTypeInfo(exprFactory.getTypeInfo(columnDesc).getTypeName().toLowerCase()); - return interpretNodeAs(typeInfo, valueDesc); - } - if (exprFactory.isSTRUCTFuncCallExpr(columnDesc) && exprFactory.isConstantStruct(valueDesc)) { - List columnChilds = exprFactory.getExprChildren(columnDesc); - ExprNodeConstantDesc valueConstDesc = (ExprNodeConstantDesc) valueDesc; - StructTypeInfo structTypeInfo = (StructTypeInfo) valueConstDesc.getTypeInfo(); - ArrayList structFieldInfos = structTypeInfo.getAllStructFieldTypeInfos(); - ArrayList newStructFieldInfos = new ArrayList<>(); - - if (columnChilds.size() != structFieldInfos.size()) { - throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnChilds + " and " + structFieldInfos)); - } - List oldValues = (List) valueConstDesc.getValue(); - List newValues = new ArrayList<>(); - for (int i = 0; i < columnChilds.size(); i++) { - newStructFieldInfos.add(exprFactory.getTypeInfo(columnChilds.get(i))); - Object newValue = exprFactory.interpretConstantAsPrimitive( - (PrimitiveTypeInfo) exprFactory.getTypeInfo(columnChilds.get(i)), - oldValues.get(i), - (PrimitiveTypeInfo) structFieldInfos.get(i)); - newValues.add(newValue); - } - StructTypeInfo sti = new StructTypeInfo(); - sti.setAllStructFieldTypeInfos(newStructFieldInfos); - sti.setAllStructFieldNames(structTypeInfo.getAllStructFieldNames()); - return exprFactory.createConstantExpr(sti, newValues); + final PrimitiveTypeInfo typeInfo = TypeInfoFactory.getPrimitiveTypeInfo( + exprFactory.getTypeInfo(columnDesc).getTypeName().toLowerCase()); + return interpretNodeAsConstant(typeInfo, valueDesc); + } + boolean columnStruct = exprFactory.isSTRUCTFuncCallExpr(columnDesc); + if (columnStruct) { + boolean constantValuesStruct = exprFactory.isConstantStruct(valueDesc); + boolean valuesStruct = exprFactory.isSTRUCTFuncCallExpr(valueDesc); + if (constantValuesStruct || valuesStruct) { + List columnChilds = exprFactory.getExprChildren(columnDesc); + List structFieldInfos = exprFactory.getStructTypeInfoList(valueDesc); + List structFieldNames = exprFactory.getStructNameList(valueDesc); + + if (columnChilds.size() != structFieldInfos.size()) { + throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnChilds + " and " + structFieldInfos)); + } - } - if (exprFactory.isSTRUCTFuncCallExpr(columnDesc) && exprFactory.isSTRUCTFuncCallExpr(valueDesc)) { - List columnChilds = exprFactory.getExprChildren(columnDesc); - List valueChilds = exprFactory.getExprChildren(valueDesc); - if (columnChilds.size() != valueChilds.size()) { - throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnChilds + " and " + valueChilds)); - } - List oldValueChilds = new ArrayList<>(valueChilds); - valueChilds.clear(); - for (int i = 0; i < oldValueChilds.size(); i++) { - T newValue = interpretNodeAsStruct(columnChilds.get(i), oldValueChilds.get(i)); - valueChilds.add(newValue); + if (constantValuesStruct) { + List literals = (List) exprFactory.getConstantValue(valueDesc); + List constantExpressions = new ArrayList<>(); + List newStructFieldInfos = new ArrayList<>(); + for (int i = 0; i < columnChilds.size(); i++) { + final PrimitiveTypeInfo typeInfo = TypeInfoFactory.getPrimitiveTypeInfo( + exprFactory.getTypeInfo(columnChilds.get(i)).getTypeName().toLowerCase()); + T constantExpression = interpretNodeAsConstant(typeInfo, + exprFactory.createConstantExpr(structFieldInfos.get(i), literals.get(i))); + if (constantExpression == null) { + constantExpression = exprFactory.createConstantExpr(typeInfo, null); + } + constantExpressions.add(constantExpression); + newStructFieldInfos.add(exprFactory.getTypeInfo(constantExpression)); + } + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldNames(new ArrayList<>(structFieldNames)); + structTypeInfo.setAllStructFieldTypeInfos(new ArrayList<>(newStructFieldInfos)); + return exprFactory.createStructExpr(structTypeInfo, constantExpressions); + } else { // valuesStruct + List valueChilds = exprFactory.getExprChildren(valueDesc); + List newValueChilds = new ArrayList<>(); + List newStructFieldInfos = new ArrayList<>(); + for (int i = 0; i < columnChilds.size(); i++) { + T newValue = interpretNodeAsConstantStruct(columnChilds.get(i), valueChilds.get(i)); + newValueChilds.add(newValue); + newStructFieldInfos.add(exprFactory.getTypeInfo(columnChilds.get(i))); + } + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldNames(new ArrayList<>(structFieldNames)); + structTypeInfo.setAllStructFieldTypeInfos(new ArrayList<>(newStructFieldInfos)); + return exprFactory.createStructExpr(structTypeInfo, newValueChilds); + } } } return valueDesc; } @VisibleForTesting - protected T interpretNodeAs(PrimitiveTypeInfo colTypeInfo, T constChild) { + protected T interpretNodeAsConstant(PrimitiveTypeInfo targetType, T constChild) throws SemanticException { if (exprFactory.isConstantExpr(constChild)) { // Try to narrow type of constant Object constVal = exprFactory.getConstantValue(constChild); if (constVal == null) { // adjust type of null - return exprFactory.createConstantExpr(colTypeInfo, null); + return exprFactory.createConstantExpr(targetType, null); } + PrimitiveTypeInfo sourceType = + (PrimitiveTypeInfo) exprFactory.getTypeInfo(constChild); Object newConst = exprFactory.interpretConstantAsPrimitive( - colTypeInfo, constVal, (PrimitiveTypeInfo) exprFactory.getTypeInfo(constChild)); + targetType, constVal, sourceType); if (newConst == null) { return null; } if (newConst == constVal) { return constChild; } else { - return exprFactory.createConstantExpr(exprFactory.adjustConstantType(colTypeInfo, newConst), newConst); + return exprFactory.createConstantExpr(exprFactory.adjustConstantType(targetType, newConst), newConst); } } return constChild; @@ -1179,7 +1218,7 @@ private boolean isDescendant(Node ans, Node des) { } protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, - Object... nodeOutputs) throws SemanticException { + Object... nodeOutputs) throws SemanticException { RowResolver input = ctx.getInputRR(); String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) .getText()); @@ -1188,18 +1227,23 @@ protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, T desc = (T) nodeOutputs[1]; String colName; if (exprFactory.isConstantExpr(desc)) { - colName = exprFactory.getConstantValue(desc).toString(); + colName = exprFactory.getConstantValueAsString(desc); } else if (exprFactory.isColumnRefExpr(desc)) { - colName = exprFactory.getColumnName(desc); + colName = exprFactory.getColumnName(desc, input); } else { throw new SemanticException("Unexpected ExprNode : " + nodeOutputs[1]); } + ColumnInfo colInfo = input.get(tableAlias, colName); + RowResolver usedRR = input; + int offset = 0; // Try outer Row resolver if (colInfo == null && ctx.getOuterRR() != null) { RowResolver outerRR = ctx.getOuterRR(); colInfo = outerRR.get(tableAlias, colName); + usedRR = outerRR; + offset = input.getColumnInfos().size(); } if (colInfo == null) { @@ -1207,7 +1251,7 @@ protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, ErrorMsg.INVALID_COLUMN.getMsg(), expr.getChild(1)), expr); return null; } - return exprFactory.toExpr(colInfo); + return exprFactory.toExpr(colInfo, usedRR, offset); } @Override @@ -1300,14 +1344,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (Map.Entry colMap : columns.entrySet()) { ColumnInfo colInfo = colMap.getValue(); if (!colInfo.getIsVirtualCol()) { - columnList = exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo)); + exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo, input, 0)); } } } else { // all columns (select *, for example) for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - columnList = exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo)); + exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo, input, 0)); } } } @@ -1361,7 +1405,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - children.add(exprFactory.toExpr(colInfo)); + children.add(exprFactory.toExpr(colInfo, input, 0)); } } } @@ -1520,14 +1564,18 @@ private T processGByExpr(Node nd, Object procCtx) throws SemanticException { // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); + RowResolver usedRR = input; + int offset = 0; // try outer row resolver RowResolver outerRR = ctx.getOuterRR(); if (colInfo == null && outerRR != null) { colInfo = outerRR.getExpression(expr); + usedRR = outerRR; + offset = input.getColumnInfos().size(); } if (colInfo != null) { - desc = exprFactory.createColumnRefExpr(colInfo); + desc = exprFactory.createColumnRefExpr(colInfo, usedRR, offset); ASTNode source = input.getExpressionSource(expr); if (source != null && ctx.getUnparseTranslator() != null) { ctx.getUnparseTranslator().addCopyTranslation(expr, source); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactoryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactoryUtils.java deleted file mode 100644 index f1c9850a7d..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactoryUtils.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse.type; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; - -import com.google.common.collect.Lists; - -public class TypeCheckProcFactoryUtils { - - static List rewriteInToOR(List inOperands) throws SemanticException { - ExprNodeDesc columnDesc = inOperands.get(0); - - ArrayList orOperands = new ArrayList<>(); - for (int i = 1; i < inOperands.size(); i++) { - ExprNodeDesc andExpr = buildEqualsArr(columnDesc, inOperands.get(i)); - if (andExpr == null) { - return null; - } - orOperands.add(andExpr); - } - return orOperands; - } - - private static ExprNodeDesc buildEqualsArr(ExprNodeDesc columnDesc, ExprNodeDesc exprNodeDesc) - throws SemanticException { - List lNodes = asListOfNodes(columnDesc); - List rNodes = asListOfNodes(exprNodeDesc); - if (lNodes == null || rNodes == null) { - // something went wrong - return null; - } - if (lNodes.size() != rNodes.size()) { - throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnDesc + " and " + exprNodeDesc)); - } - - List ret = new ArrayList<>(); - for (int i = 0; i < lNodes.size(); i++) { - ret.add(buildEquals(lNodes.get(i), rNodes.get(i))); - } - return buildAnd(ret); - } - - private static ExprNodeGenericFuncDesc buildEquals(ExprNodeDesc columnDesc, ExprNodeDesc valueDesc) { - return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPEqual(), "=", - Lists.newArrayList(columnDesc, valueDesc)); - } - - private static ExprNodeDesc buildAnd(List values) { - if (values.size() == 1) { - return values.get(0); - } else { - return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), "and", values); - } - } - - private static List asListOfNodes(ExprNodeDesc desc) { - ExprNodeDesc valueDesc = desc; - if (ExprNodeDescUtils.isStructUDF(desc)) { - List valueChilds = ((ExprNodeGenericFuncDesc) valueDesc).getChildren(); - for (ExprNodeDesc exprNodeDesc : valueChilds) { - if (!isSafeExpression(exprNodeDesc)) { - return null; - } - } - return valueChilds; - } - if (ExprNodeDescUtils.isConstantStruct(valueDesc)) { - ExprNodeConstantDesc valueConstDesc = (ExprNodeConstantDesc) valueDesc; - List oldValues = (List) valueConstDesc.getValue(); - StructTypeInfo structTypeInfo = (StructTypeInfo) valueConstDesc.getTypeInfo(); - ArrayList structFieldInfos = structTypeInfo.getAllStructFieldTypeInfos(); - - List ret = new ArrayList<>(); - for (int i = 0; i < oldValues.size(); i++) { - ret.add(new ExprNodeConstantDesc(structFieldInfos.get(i), oldValues.get(i))); - } - return ret; - } - if (isSafeExpression(desc)) { - return Lists.newArrayList(desc); - } - - return null; - } - - private static boolean isSafeExpression(ExprNodeDesc desc) { - TypeInfo typeInfo = desc.getTypeInfo(); - if (typeInfo.getCategory() != Category.PRIMITIVE) { - return false; - } - if (isConstantOrColumn(desc)) { - return true; - } - if (desc instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc exprNodeGenericFuncDesc = (ExprNodeGenericFuncDesc) desc; - if (FunctionRegistry.isConsistentWithinQuery(exprNodeGenericFuncDesc.getGenericUDF())) { - for (ExprNodeDesc child : exprNodeGenericFuncDesc.getChildren()) { - if (!isSafeExpression(child)) { - return false; - } - } - return true; - } - } - return false; - } - - private static boolean isConstantOrColumn(ExprNodeDesc desc) { - return desc instanceof ExprNodeColumnDesc || desc instanceof ExprNodeConstantDesc; - } - -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 26a74c2af3..bd92730ae9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -664,17 +664,21 @@ public static boolean isNullConstant(ExprNodeDesc value) { public static PrimitiveTypeInfo deriveMinArgumentCast( ExprNodeDesc childExpr, TypeInfo targetType) { + return deriveMinArgumentCast(childExpr.getTypeInfo(), targetType); + } + + public static PrimitiveTypeInfo deriveMinArgumentCast( + TypeInfo childTi, TypeInfo targetType) { assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType; PrimitiveTypeInfo pti = (PrimitiveTypeInfo)targetType; // We only do the minimum cast for decimals. Other types are assumed safe; fix if needed. // We also don't do anything for non-primitive children (maybe we should assert). if ((pti.getPrimitiveCategory() != PrimitiveCategory.DECIMAL) - || (!(childExpr.getTypeInfo() instanceof PrimitiveTypeInfo))) { + || (!(childTi instanceof PrimitiveTypeInfo))) { return pti; } - PrimitiveTypeInfo childTi = (PrimitiveTypeInfo)childExpr.getTypeInfo(); // If the child is also decimal, no cast is needed (we hope - can target type be narrower?). - return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi); + return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory((PrimitiveTypeInfo) childTi); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 1a46cacd4c..35ac545c89 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -232,30 +232,6 @@ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, childrenOIs[i] = children.get(i).getWritableObjectInspector(); } - // Check if a bigint is implicitely cast to a double as part of a comparison - // Perform the check here instead of in GenericUDFBaseCompare to guarantee it is only run once per operator - if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2) { - - TypeInfo oiTypeInfo0 = children.get(0).getTypeInfo(); - TypeInfo oiTypeInfo1 = children.get(1).getTypeInfo(); - - SessionState ss = SessionState.get(); - Configuration conf = (ss != null) ? ss.getConf() : new Configuration(); - - LogHelper console = new LogHelper(LOG); - - // For now, if a bigint is going to be cast to a double throw an error or warning - if ((oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || - (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo))) { - String error = StrictChecks.checkTypeSafety(conf); - if (error != null) throw new UDFArgumentException(error); - console.printError("WARNING: Comparing a bigint and a string may result in a loss of precision."); - } else if ((oiTypeInfo0.equals(TypeInfoFactory.doubleTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || - (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.doubleTypeInfo))) { - console.printError("WARNING: Comparing a bigint and a double may result in a loss of precision."); - } - } - ObjectInspector oi = genericUDF.initializeAndFoldConstants(childrenOIs); String[] requiredJars = genericUDF.getRequiredJars(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java index b27dacb4b4..523d1a6db2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.Collection; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory.DefaultExprProcessor; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -78,63 +79,63 @@ public void init() { testSubject = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor(); } - public void testOneCase(Object constValue) { + public void testOneCase(Object constValue) throws SemanticException { Mockito.when(nodeDesc.getValue()).thenReturn(constValue); Mockito.when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); - ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAsConstant(typeInfo, nodeDesc); Assert.assertNotNull(result); Assert.assertEquals(expectedValue, result.getValue()); } - public void testNullCase(Object constValue) { + public void testNullCase(Object constValue) throws SemanticException { Mockito.when(nodeDesc.getValue()).thenReturn(constValue); Mockito.when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); - ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAsConstant(typeInfo, nodeDesc); Assert.assertNull(result); } @Test - public void testWithSring() { + public void testWithSring() throws SemanticException { testOneCase(maxValue.toString()); } @Test - public void testWithLSuffix() { + public void testWithLSuffix() throws SemanticException { if (intType) { testOneCase(maxValue.toString() + "L"); } } @Test - public void testWithZeroFraction() { + public void testWithZeroFraction() throws SemanticException { if (intType) { testOneCase(maxValue.toString() + ".0"); } } @Test - public void testWithFSuffix() { + public void testWithFSuffix() throws SemanticException { testOneCase(maxValue.toString() + "f"); } @Test - public void testWithDSuffix() { + public void testWithDSuffix() throws SemanticException { testOneCase(maxValue.toString() + "D"); } @Test - public void testOverflow() { + public void testOverflow() throws SemanticException { if (intType) { testNullCase(maxValue.add(BigDecimal.valueOf(1L)).toString()); } } @Test - public void testWithNonZeroFraction() { + public void testWithNonZeroFraction() throws SemanticException { if (intType) { testNullCase("100.1"); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java index 7de7065823..4dc46c52a6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java @@ -184,7 +184,6 @@ public void testBreakupAnd2() throws ParseException, CommandProcessorException { OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0)); Object pred = filterSig.getSig().getSigMap().get("getPredicateString"); assertEquals("(u = 2) (type: boolean)", pred); - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java b/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java index ec0bc8915d..93be5a62d1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -71,7 +72,7 @@ * @return A list of expressions * @throws UDFArgumentException if the UDF has been formulated incorrectly */ - public abstract List getExpressionList() throws UDFArgumentException; + public abstract List getExpressionList() throws SemanticException; /** * This method drives the test. It takes the data from getBaseTable() and diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java index 8519a1265a..e745f31423 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java @@ -21,9 +21,8 @@ import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; -import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.testutil.BaseScalarUdfTest; import org.apache.hadoop.hive.ql.testutil.DataBuilder; @@ -63,7 +62,7 @@ } @Override - public List getExpressionList() throws UDFArgumentException { + public List getExpressionList() throws SemanticException { ExprNodeDesc expr1 = OperatorTestUtils.getStringColumn("a"); ExprNodeDesc expr2 = OperatorTestUtils.getStringColumn("b"); ExprNodeDesc exprDesc2 = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java index 8c871d5500..ec7233edec 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; import org.junit.Assert; @@ -94,7 +95,7 @@ } @Override - public List getExpressionList() throws UDFArgumentException { + public List getExpressionList() throws SemanticException { List exprs = new ArrayList(cols.length); for (int i = 0; i < cols.length; i++) { exprs.add(OperatorTestUtils.getStringColumn(cols[i])); diff --git a/ql/src/test/queries/clientpositive/partition_coltype_literals.q b/ql/src/test/queries/clientpositive/partition_coltype_literals.q index e65371e8c8..9d92278e4a 100644 --- a/ql/src/test/queries/clientpositive/partition_coltype_literals.q +++ b/ql/src/test/queries/clientpositive/partition_coltype_literals.q @@ -1,4 +1,5 @@ --! qt:dataset:src +set hive.strict.checks.type.safety=false; set hive.stats.column.autogather=false; set hive.compute.query.using.stats=false; drop table if exists partcoltypenum; diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index cd7681c50b..183cc4f8be 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -160,7 +160,7 @@ POSTHOOK: Input: default@alter_coltype #### A masked pattern was here #### OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype` -WHERE `ts` = 3.0 AND `dt` = 100 +WHERE `ts` = 3 AND `dt` = 100 STAGE DEPENDENCIES: Stage-0 is a root stage diff --git a/ql/src/test/results/clientpositive/avrotblsjoin.q.out b/ql/src/test/results/clientpositive/avrotblsjoin.q.out index f6579557aa..3e6969abb8 100644 --- a/ql/src/test/results/clientpositive/avrotblsjoin.q.out +++ b/ql/src/test/results/clientpositive/avrotblsjoin.q.out @@ -72,6 +72,7 @@ POSTHOOK: Output: default@table1_1 POSTHOOK: Lineage: table1_1.col1 SCRIPT [] POSTHOOK: Lineage: table1_1.col2 SCRIPT [] WARNING: Comparing a bigint and a string may result in a loss of precision. +WARNING: Comparing a bigint and a string may result in a loss of precision. Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select table1_n1.col1, table1_1.* from table1_n1 join table1_1 on table1_n1.col1=table1_1.col1 where table1_1.col1="1" PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/collision.q.out b/ql/src/test/results/clientpositive/collision.q.out index 4b889df49d..712acc2cd8 100644 --- a/ql/src/test/results/clientpositive/collision.q.out +++ b/ql/src/test/results/clientpositive/collision.q.out @@ -477,7 +477,7 @@ POSTHOOK: Input: cpn@mytable_1000_n POSTHOOK: Output: cpn_view@myview_1000_n POSTHOOK: Output: database:cpn_view CBO PLAN: -HiveProject(col0=[$0], col1=[$1], col2=[$2], col3=[$118], col4=[$3], col5=[$4], col6=[$5], col7=[$6], col8=[CASE(IS NOT NULL($7), $7, 0:DECIMAL(1, 0))], col9=[CASE(IS NOT NULL($8), $8, 0:DECIMAL(28, 0))], col10=[$9], alt_col11=[CASE(CASE(IS NOT NULL($20), =(CAST($20):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'X'), false), _UTF-16LE'Y':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CAST($20):VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], alt_col12=[CASE(CASE(IS NOT NULL($91), =(CAST($91):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'X'), false), _UTF-16LE'Y':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CAST($91):VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col13=[$10], col14=[CASE(IN(trim($4), _UTF-16LE'A', _UTF-16LE'B', _UTF-16LE'C'), CASE(IS NOT NULL($62), $62, -1:DECIMAL(18, 0)), CASE(IS NOT NULL($62), $62, 1:DECIMAL(18, 0)))], col15=[$11], col16=[$12], col17=[$13], col18=[$14], col19=[$15], col20=[$17], col21=[$16], col22=[$18], col23=[$19], col11=[CASE(IS NOT NULL(trim($20)), trim($20), _UTF-16LE'Z':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col24=[$21], col25=[$22], col26=[$23], col27=[$24], col28=[$25], col29=[$27], col30=[$28], col31=[$29], col32=[$31], col33=[$32], col34=[$33], col35=[$34], col36=[$35], col37=[$36], col38=[$37], col39=[CASE(IS NOT NULL(trim($39)), trim($39), _UTF-16LE'N':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], parent_col9=[CASE(IS NOT NULL($40), $40, -999:DECIMAL(28, 0))], col40=[CASE(IS NOT NULL($41), CAST($41):INTEGER, -1)], col41=[$42], col42=[$43], col43=[CASE(=(trim($72), _UTF-16LE''), _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CASE(IS NOT NULL(trim($72)), trim($72), _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))], col44=[$83], col45=[$84], col46=[$85], col47=[$86], col48=[$87], col49=[$88], col50=[$89], col51=[$90], col12=[$91], col52=[$92], col53=[$93], col54=[$94], col55=[$95], col56=[$96], col57=[$97], col58=[CASE(IS NOT NULL($98), $98, -1)], col59=[$99], col60=[$100], col61=[$102], col62=[$103], col63=[CASE(=(trim(CAST($106):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), _UTF-16LE''), _UTF-16LE'-1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CASE(IS NOT NULL(trim(CAST($106):VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), trim(CAST($106):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), _UTF-16LE'-1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))], col64=[$107], col65=[$105], col66=[$108], col67=[CASE(IS NOT NULL($68), $68, -999:DECIMAL(18, 0))], col68=[$109], col69=[$110], col70=[$113], col71=[$114], col72=[$30], col73=[$38], col74=[$104], col75=[$26], col76=[$101], col77=[$115], col116=[$116], col117=[$117], col78=[CASE(IS NOT NULL($56), $56, -1:DECIMAL(18, 0))], col79=[$69], col114=[$111], col115=[$112], col92=[CASE(IS NOT NULL($49), CAST($49):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'-99':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col80=[CASE(IS NOT NULL($52), $52, -1)], col81=[$48], col93=[$50], col82=[$73], col83=[$47], col118=[$44], col84=[CASE(IS NOT NULL($74), CAST($74):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'-99':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col85=[$64], col86=[$79], col87=[$58], col88=[CASE(IS NOT NULL($80), CAST($80):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'-99':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) +HiveProject(col0=[$0], col1=[$1], col2=[$2], col3=[$118], col4=[$3], col5=[$4], col6=[$5], col7=[$6], col8=[CASE(IS NOT NULL($7), $7, 0:DECIMAL(1, 0))], col9=[CASE(IS NOT NULL($8), $8, 0:DECIMAL(28, 0))], col10=[$9], alt_col11=[CASE(CASE(IS NOT NULL($20), =(CAST($20):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'X'), false), _UTF-16LE'Y':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CAST($20):VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], alt_col12=[CASE(CASE(IS NOT NULL($91), =(CAST($91):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'X'), false), _UTF-16LE'Y':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CAST($91):VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col13=[$10], col14=[CASE(IN(trim($4), _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), CASE(IS NOT NULL($62), $62, -1:DECIMAL(18, 0)), CASE(IS NOT NULL($62), $62, 1:DECIMAL(18, 0)))], col15=[$11], col16=[$12], col17=[$13], col18=[$14], col19=[$15], col20=[$17], col21=[$16], col22=[$18], col23=[$19], col11=[CASE(IS NOT NULL(trim($20)), trim($20), _UTF-16LE'Z':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col24=[$21], col25=[$22], col26=[$23], col27=[$24], col28=[$25], col29=[$27], col30=[$28], col31=[$29], col32=[$31], col33=[$32], col34=[$33], col35=[$34], col36=[$35], col37=[$36], col38=[$37], col39=[CASE(IS NOT NULL(trim($39)), trim($39), _UTF-16LE'N':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], parent_col9=[CASE(IS NOT NULL($40), $40, -999:DECIMAL(28, 0))], col40=[CASE(IS NOT NULL($41), CAST($41):INTEGER, -1)], col41=[$42], col42=[$43], col43=[CASE(=(trim($72), _UTF-16LE''), _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CASE(IS NOT NULL(trim($72)), trim($72), _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))], col44=[$83], col45=[$84], col46=[$85], col47=[$86], col48=[$87], col49=[$88], col50=[$89], col51=[$90], col12=[$91], col52=[$92], col53=[$93], col54=[$94], col55=[$95], col56=[$96], col57=[$97], col58=[CASE(IS NOT NULL($98), $98, -1)], col59=[$99], col60=[$100], col61=[$102], col62=[$103], col63=[CASE(=(trim(CAST($106):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), _UTF-16LE''), _UTF-16LE'-1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", CASE(IS NOT NULL(trim(CAST($106):VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), trim(CAST($106):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), _UTF-16LE'-1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))], col64=[$107], col65=[$105], col66=[$108], col67=[CASE(IS NOT NULL($68), $68, -999:DECIMAL(18, 0))], col68=[$109], col69=[$110], col70=[$113], col71=[$114], col72=[$30], col73=[$38], col74=[$104], col75=[$26], col76=[$101], col77=[$115], col116=[$116], col117=[$117], col78=[CASE(IS NOT NULL($56), $56, -1:DECIMAL(18, 0))], col79=[$69], col114=[$111], col115=[$112], col92=[CASE(IS NOT NULL($49), CAST($49):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'-99':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col80=[CASE(IS NOT NULL($52), $52, -1)], col81=[$48], col93=[$50], col82=[$73], col83=[$47], col118=[$44], col84=[CASE(IS NOT NULL($74), CAST($74):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'-99':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], col85=[$64], col86=[$79], col87=[$58], col88=[CASE(IS NOT NULL($80), CAST($80):VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'-99':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[cpn, mytable_1000_n]], table:alias=[mytable_1000_n]) PREHOOK: query: CREATE OR REPLACE VIEW cpn_view.myview_1000_n AS diff --git a/ql/src/test/results/clientpositive/constprog_cast.q.out b/ql/src/test/results/clientpositive/constprog_cast.q.out index 3f76604b17..2133921158 100644 --- a/ql/src/test/results/clientpositive/constprog_cast.q.out +++ b/ql/src/test/results/clientpositive/constprog_cast.q.out @@ -33,10 +33,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: constcasttest - filterExpr: (CAST( CAST( id AS varchar(9)) AS STRING) = '2019-11-0') (type: boolean) + filterExpr: (CAST( id AS varchar(9)) = '2019-11-0') (type: boolean) Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (CAST( CAST( id AS varchar(9)) AS STRING) = '2019-11-0') (type: boolean) + predicate: (CAST( id AS varchar(9)) = '2019-11-0') (type: boolean) Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), CAST( id AS varchar(10)) (type: varchar(10)) diff --git a/ql/src/test/results/clientpositive/decimal_precision2.q.out b/ql/src/test/results/clientpositive/decimal_precision2.q.out index 3ec620cb0d..3cba039026 100644 --- a/ql/src/test/results/clientpositive/decimal_precision2.q.out +++ b/ql/src/test/results/clientpositive/decimal_precision2.q.out @@ -175,7 +175,7 @@ STAGE PLANS: Row Limit Per Split: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 69.0212249755859375 (type: decimal(27,20)) + expressions: 69.0212249755859375 (type: decimal(29,20)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out b/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out index beb4618287..afbb2a12f6 100644 --- a/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out +++ b/ql/src/test/results/clientpositive/druid/druid_timeseries.q.out @@ -31,7 +31,7 @@ STAGE PLANS: properties: druid.fieldNames $f0 druid.fieldTypes bigint - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"},{"type":"bound","dimension":"__time","lower":"2010-01-01T00:00:00.000Z","lowerStrict":false,"upper":"2012-03-01T00:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"bound","dimension":"added","upper":"0","upperStrict":false,"ordering":"numeric"},{"type":"bound","dimension":"__time","lower":"2010-01-01T00:00:00.000Z","lowerStrict":false,"upper":"2012-03-01T00:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Select Operator expressions: $f0 (type: bigint) diff --git a/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out b/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out index f57b6b30c8..7e3c2caada 100644 --- a/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out +++ b/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out @@ -542,7 +542,7 @@ STAGE PLANS: properties: druid.fieldNames language,c druid.fieldTypes string,double - druid.query.json {"queryType":"groupBy","dataSource":"default.druid_kafka_test","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"c","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"longSum","name":"$f1","fieldName":"added"},{"type":"longSum","name":"$f2","fieldName":"deleted"},{"type":"count","name":"$f3"}],"postAggregations":[{"type":"expression","name":"c","expression":"(-1.0 * ((CAST((\"$f1\" - \"$f2\"), 'DOUBLE') / CAST((\"$f3\" * 3), 'DOUBLE')) + CAST(\"$f2\", 'DOUBLE')))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} + druid.query.json {"queryType":"groupBy","dataSource":"default.druid_kafka_test","granularity":"all","dimensions":[{"type":"default","dimension":"language","outputName":"language","outputType":"STRING"}],"limitSpec":{"type":"default","columns":[{"dimension":"c","direction":"descending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"longSum","name":"$f1","fieldName":"added"},{"type":"longSum","name":"$f2","fieldName":"deleted"},{"type":"count","name":"$f3"}],"postAggregations":[{"type":"expression","name":"c","expression":"(-1 * ((CAST((\"$f1\" - \"$f2\"), 'DOUBLE') / CAST((\"$f3\" * 3), 'DOUBLE')) + CAST(\"$f2\", 'DOUBLE')))"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy Select Operator expressions: language (type: string), c (type: double) diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index dda4be8eb1..cebec998a1 100644 --- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -44,10 +44,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lineitem - filterExpr: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) + filterExpr: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 100 Data size: 19000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) + predicate: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double) diff --git a/ql/src/test/results/clientpositive/foldts.q.out b/ql/src/test/results/clientpositive/foldts.q.out index feda88c156..e995f282cf 100644 --- a/ql/src/test/results/clientpositive/foldts.q.out +++ b/ql/src/test/results/clientpositive/foldts.q.out @@ -20,7 +20,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) + expressions: ctimestamp1 (type: timestamp), unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 563568 Basic stats: COMPLETE Column stats: COMPLETE Limit @@ -134,7 +134,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + expressions: from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') (type: string) outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 2260992 Basic stats: COMPLETE Column stats: COMPLETE Limit diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index f7683c6d1a..4b4d57f2a0 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -57,10 +57,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((((value = 'val_400') or (value = 'val_500')) and ((key = 400) or (key = 450))) or (((value = 'val_100') or (value = 'val_200') or (value = 'val_300')) and ((key = 100) or (key = 150) or (key = 200)))) (type: boolean) + filterExpr: (((value) IN ('val_400', 'val_500') and (key) IN (400, 450)) or ((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200))) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((value = 'val_400') or (value = 'val_500')) and ((key = 400) or (key = 450))) or (((value = 'val_100') or (value = 'val_200') or (value = 'val_300')) and ((key = 100) or (key = 150) or (key = 200)))) (type: boolean) + predicate: (((value) IN ('val_400', 'val_500') and (key) IN (400, 450)) or ((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200))) (type: boolean) Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) @@ -69,12 +69,11 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) - Execution mode: vectorized Reduce Operator Tree: Forward Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((VALUE._col0 = 'val_100') or (VALUE._col0 = 'val_200') or (VALUE._col0 = 'val_300')) and ((KEY._col0 = 100) or (KEY._col0 = 150) or (KEY._col0 = 200))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -111,7 +110,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: (((VALUE._col0 = 'val_400') or (VALUE._col0 = 'val_500')) and ((KEY._col0 = 400) or (KEY._col0 = 450))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -593,7 +592,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((value = 'val_400') or (value = 'val_500')) and ((key = 400) or (key = 450))) or (((value = 'val_100') or (value = 'val_200') or (value = 'val_300')) and ((key = 100) or (key = 150) or (key = 200)))) (type: boolean) + predicate: (((value) IN ('val_400', 'val_500') and (key) IN (400, 450)) or ((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200))) (type: boolean) Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) @@ -602,12 +601,11 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) - Execution mode: vectorized Reduce Operator Tree: Forward Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((VALUE._col0 = 'val_100') or (VALUE._col0 = 'val_200') or (VALUE._col0 = 'val_300')) and ((KEY._col0 = 100) or (KEY._col0 = 150) or (KEY._col0 = 200))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -644,7 +642,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator - predicate: (((VALUE._col0 = 'val_400') or (VALUE._col0 = 'val_500')) and ((KEY._col0 = 400) or (KEY._col0 = 450))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out index 756309cf12..7ffd0d6d7e 100644 --- a/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out @@ -37,10 +37,10 @@ STAGE PLANS: $hdt$_0:src TableScan alias: src - filterExpr: ((value = 'val_105') and (key = '105')) (type: boolean) + filterExpr: ((key = '105') and (value = 'val_105')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = 'val_105') and (key = '105')) (type: boolean) + predicate: ((key = '105') and (value = 'val_105')) (type: boolean) Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/in_typecheck_char.q.out b/ql/src/test/results/clientpositive/in_typecheck_char.q.out index cded6d380e..d2fcdf48f0 100644 --- a/ql/src/test/results/clientpositive/in_typecheck_char.q.out +++ b/ql/src/test/results/clientpositive/in_typecheck_char.q.out @@ -108,13 +108,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ax - filterExpr: (struct(s,t)) IN (const struct('a','a '), const struct('b','bb ')) (type: boolean) + filterExpr: (struct(CAST( s AS STRING),CAST( t AS STRING))) IN (const struct('a','a'), const struct('b','bb')) (type: boolean) Statistics: Num rows: 3 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (struct(s,t)) IN (const struct('a','a '), const struct('b','bb ')) (type: boolean) - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (struct(CAST( s AS STRING),CAST( t AS STRING))) IN (const struct('a','a'), const struct('b','bb')) (type: boolean) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 @@ -126,6 +126,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -213,10 +214,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ax - filterExpr: (((s = 'a') and (t = 'a ')) or (null and (t = 'bb '))) is null (type: boolean) + filterExpr: (((CAST( s AS STRING) = 'a') and (CAST( t AS STRING) = 'a')) or (null and (CAST( t AS STRING) = 'bb'))) is null (type: boolean) Statistics: Num rows: 3 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((s = 'a') and (t = 'a ')) or (null and (t = 'bb '))) is null (type: boolean) + predicate: (((CAST( s AS STRING) = 'a') and (CAST( t AS STRING) = 'a')) or (null and (CAST( t AS STRING) = 'bb'))) is null (type: boolean) Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/in_typecheck_varchar.q.out b/ql/src/test/results/clientpositive/in_typecheck_varchar.q.out index 689f50adda..2c73d020bd 100644 --- a/ql/src/test/results/clientpositive/in_typecheck_varchar.q.out +++ b/ql/src/test/results/clientpositive/in_typecheck_varchar.q.out @@ -108,10 +108,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ax - filterExpr: (struct(s,t)) IN (const struct('a','a'), const struct('b','bb')) (type: boolean) + filterExpr: (struct(CAST( s AS STRING),CAST( t AS STRING))) IN (const struct('a','a'), const struct('b','bb')) (type: boolean) Statistics: Num rows: 3 Data size: 513 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (struct(s,t)) IN (const struct('a','a'), const struct('b','bb')) (type: boolean) + predicate: (struct(CAST( s AS STRING),CAST( t AS STRING))) IN (const struct('a','a'), const struct('b','bb')) (type: boolean) Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE @@ -126,6 +126,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/infer_const_type.q.out b/ql/src/test/results/clientpositive/infer_const_type.q.out index aacc329480..b636090081 100644 --- a/ql/src/test/results/clientpositive/infer_const_type.q.out +++ b/ql/src/test/results/clientpositive/infer_const_type.q.out @@ -29,6 +29,7 @@ POSTHOOK: Input: default@infertypes 127 32767 12345 -12345 906.0 -307.0 1234 126 32767 12345 -12345 906.0 -307.0 1234 126 32767 12345 -12345 906.0 -307.0 1.57 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE ti = '127' AND si = 32767 AND @@ -85,6 +86,7 @@ STAGE PLANS: Processor Tree: ListSink +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: SELECT * FROM infertypes WHERE ti = '127' AND si = 32767 AND @@ -108,6 +110,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@infertypes #### A masked pattern was here #### 127 32767 12345 -12345 906.0 -307.0 1234 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -138,6 +141,7 @@ STAGE PLANS: Processor Tree: ListSink +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -182,10 +186,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((ti = 127Y) or (i = -100) or (CAST( si AS decimal(5,0)) = 327)) (type: boolean) + filterExpr: ((ti = 127Y) or (si = 327S) or (i = -100)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ti = 127Y) or (i = -100) or (CAST( si AS decimal(5,0)) = 327)) (type: boolean) + predicate: ((ti = 127Y) or (si = 327S) or (i = -100)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) diff --git a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out index 4ff88b71ec..2fb1ba14f2 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out @@ -179,7 +179,7 @@ HiveFilter(condition=[>($0, 1)]) HiveProject(c_int=[$0]) HiveSortLimit(fetch=[1]) HiveProject(c_int=[$2]) - HiveFilter(condition=[>($3, 1.0E0)]) + HiveFilter(condition=[>($3, 1E0)]) HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) PREHOOK: query: select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int @@ -205,7 +205,7 @@ HiveSortLimit(fetch=[0]) HiveProject(_o__c0=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject($f0=[true]) - HiveFilter(condition=[>($3, 1.0E0)]) + HiveFilter(condition=[>($3, 1E0)]) HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) PREHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 diff --git a/ql/src/test/results/clientpositive/llap/check_constraint.q.out b/ql/src/test/results/clientpositive/llap/check_constraint.q.out index 3ef0744c7b..bc5d361859 100644 --- a/ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ b/ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -116,7 +116,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: enforce_constraint((((((((- _col0) > (- 10)) is not false and (_col1 > 10) is not false) and _col2 is not null is not false) and _col3 BETWEEN _col0 AND _col1 is not false) and ((_col4 = 23.4) or (_col4 = 56) or (_col4 = 4)) is not false) and ((_col5 > round(567.6)) and (_col5 < round(1000.4))) is not false)) (type: boolean) + predicate: enforce_constraint((((((((- _col0) > -10) is not false and (_col1 > 10) is not false) and _col2 is not null is not false) and _col3 BETWEEN _col0 AND _col1 is not false) and ((_col4) IN (23.4) or (_col4) IN (56) or (_col4) IN (4)) is not false) and ((_col5 > round(567.6)) and (_col5 < round(1000.4))) is not false)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: int), UDFToFloat(_col4) (type: float), UDFToLong(_col5) (type: bigint) @@ -3382,7 +3382,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: enforce_constraint(((null = 4) or (null = 5)) is not false) (type: boolean) + predicate: enforce_constraint((null) IN (4, 5) is not false) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(_col0) (type: tinyint), _col1 (type: bigint) @@ -3467,7 +3467,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: enforce_constraint(((_col1 = 4) or (_col1 = 5)) is not false) (type: boolean) + predicate: enforce_constraint((_col1) IN (4, 5) is not false) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), UDFToLong(_col1) (type: bigint) diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 19238bc173..4e4f087c8e 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2581,7 +2581,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 07f0fcdc90..7f294196bf 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -1282,10 +1282,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -1446,10 +1446,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -2763,10 +2763,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -2898,10 +2898,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -5108,10 +5108,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) diff --git a/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out index 082b244a49..1200e30dc1 100644 --- a/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out @@ -37,10 +37,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((value = 'val_105') and (key = '105')) (type: boolean) + filterExpr: ((key = '105') and (value = 'val_105')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = 'val_105') and (key = '105')) (type: boolean) + predicate: ((key = '105') and (value = 'val_105')) (type: boolean) Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 7f7df56412..567ee46b9b 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -18,7 +18,7 @@ PREHOOK: query: select * from src1 where key > 10 and value > 'val' order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"e07e602503383cf2b8477d43c5043f35","queryText":"select * from src1 where key > 10 and value > 'val' order by key limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[3,2],"targets":[0,1],"expression":"((src1.value > 'val') and (UDFToDouble(src1.key) > 10.0D))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e07e602503383cf2b8477d43c5043f35","queryText":"select * from src1 where key > 10 and value > 'val' order by key limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0,1],"expression":"((UDFToDouble(src1.key) > 10.0D) and (src1.value > 'val'))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 146 val_146 150 val_150 213 val_213 @@ -503,7 +503,7 @@ PREHOOK: query: select * from src1 where length(key) > 2 and value > 'a' PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"f4a6b14cf6ce3c1313d70720cea4e8b3","queryText":"select * from src1 where length(key) > 2 and value > 'a'","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[3,2],"targets":[0,1],"expression":"((src1.value > 'a') and (length(src1.key) > 2))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f4a6b14cf6ce3c1313d70720cea4e8b3","queryText":"select * from src1 where length(key) > 2 and value > 'a'","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0,1],"expression":"((length(src1.key) > 2) and (src1.value > 'a'))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 238 val_238 311 val_311 255 val_255 diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index a6aca8f2b3..f8190afee9 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -111,15 +111,15 @@ POSTHOOK: query: insert into orc_ppd_staging_n0 select -10,-321,-65680,-42949674 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging_n6 POSTHOOK: Output: default@orc_ppd_staging_n0 -POSTHOOK: Lineage: orc_ppd_staging_n0.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n0.b SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n0.bin EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n0.bo SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n0.c EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n0.d EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n0.da EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging_n0.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n0.dec SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n0.f EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging_n0.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n0.i SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n0.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n0.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n0.t EXPRESSION [] @@ -920,16 +920,16 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 0 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 0 - RECORDS_OUT_OPERATOR_FS_12: 1 - RECORDS_OUT_OPERATOR_GBY_11: 1 + RECORDS_OUT_OPERATOR_FIL_7: 0 + RECORDS_OUT_OPERATOR_FS_9: 1 + RECORDS_OUT_OPERATOR_GBY_8: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 - RECORDS_OUT_OPERATOR_RS_10: 0 - RECORDS_OUT_OPERATOR_SEL_9: 0 + RECORDS_OUT_OPERATOR_RS_3: 0 + RECORDS_OUT_OPERATOR_SEL_2: 0 RECORDS_OUT_OPERATOR_TS_0: 2100 TOTAL_TABLE_ROWS_WRITTEN: 0 Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 354 + CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 METADATA_CACHE_HIT: 2 NUM_DECODED_BATCHES: 3 diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out index 53e18a59f4..d37aa3c817 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out @@ -111,15 +111,15 @@ POSTHOOK: query: insert into orc_ppd_staging select -10,-321,-65680,-4294967430, POSTHOOK: type: QUERY POSTHOOK: Input: default@staging POSTHOOK: Output: default@orc_ppd_staging -POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out index 1aa1203b52..adb161778a 100644 --- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out @@ -111,15 +111,15 @@ POSTHOOK: query: insert into orc_ppd_staging_n1 select -10,-321,-65680,-42949674 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging_n7 POSTHOOK: Output: default@orc_ppd_staging_n1 -POSTHOOK: Lineage: orc_ppd_staging_n1.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n1.b SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n1.bin EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n1.bo SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n1.c EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n1.d EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n1.da EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging_n1.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n1.dec SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n1.f EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging_n1.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n1.i SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n1.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n1.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n1.t EXPRESSION [] @@ -881,7 +881,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_TS_0: 2100 TOTAL_TABLE_ROWS_WRITTEN: 0 Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 354 + CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 METADATA_CACHE_HIT: 2 NUM_DECODED_BATCHES: 3 diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out index b1679debee..59ae15c26a 100644 --- a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out @@ -111,15 +111,15 @@ POSTHOOK: query: insert into orc_ppd_staging_n2 select -10,-321,-65680,-42949674 POSTHOOK: type: QUERY POSTHOOK: Input: default@staging_n8 POSTHOOK: Output: default@orc_ppd_staging_n2 -POSTHOOK: Lineage: orc_ppd_staging_n2.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n2.b SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n2.bin EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n2.bo SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n2.c EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n2.d EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n2.da EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging_n2.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n2.dec SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n2.f EXPRESSION [] -POSTHOOK: Lineage: orc_ppd_staging_n2.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging_n2.i SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n2.s SIMPLE [] POSTHOOK: Lineage: orc_ppd_staging_n2.si EXPRESSION [] POSTHOOK: Lineage: orc_ppd_staging_n2.t EXPRESSION [] diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 18f70927d8..7bfe64c875 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -339,15 +339,15 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) - minReductionHashAggr: 0.875 + minReductionHashAggr: 0.96 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -410,18 +410,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + filterExpr: ((t < 0Y) and (t > -2Y)) (type: boolean) Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) - minReductionHashAggr: 0.875 + minReductionHashAggr: 0.96 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -511,7 +511,7 @@ STAGE PLANS: TableScan alias: orc_pred Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -541,9 +541,9 @@ STAGE PLANS: Processor Tree: TableScan alias: orc_pred - filterExpr: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + filterExpr: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -627,7 +627,7 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) @@ -695,10 +695,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index da94fef4a6..83da3258e3 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -344,7 +344,7 @@ STAGE PLANS: TableScan alias: tbl_pred Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -378,9 +378,9 @@ STAGE PLANS: Processor Tree: TableScan alias: tbl_pred - filterExpr: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + filterExpr: ((t < 0Y) and (t > -2Y)) (type: boolean) Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -445,7 +445,7 @@ STAGE PLANS: TableScan alias: tbl_pred Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -475,9 +475,9 @@ STAGE PLANS: Processor Tree: TableScan alias: tbl_pred - filterExpr: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + filterExpr: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -561,7 +561,7 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) @@ -629,10 +629,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out index 4a957f57c0..013b951e6a 100644 --- a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out +++ b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out @@ -306,13 +306,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: rct_part - filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) = 0.0D)) (type: boolean) + filterExpr: ((UDFToDouble(key) = 0.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ds = '2008-04-08') and (UDFToDouble(key) = 0.0D)) (type: boolean) - Statistics: Num rows: 2 Data size: 700 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) = 0.0D) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 3 Data size: 1051 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1051 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out index 773ed8093f..d05e7c7621 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out @@ -367,7 +367,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 0 -Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out index 96ca2d95ac..8fa69c5aaf 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -276,7 +276,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 25 -Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index a00813c8cc..88c63ae091 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -124,7 +124,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - filterExpr: ((key < '11') or ((key < '11') and (value > 'val_11')) or ((key < '11') and (value > 'val_11'))) (type: boolean) + filterExpr: ((key < '11') or ((value > 'val_11') and (key < '11')) or ((value > 'val_11') and (key < '11'))) (type: boolean) properties: insideView TRUE Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -142,7 +142,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) @@ -159,7 +159,7 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) @@ -187,7 +187,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) diff --git a/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out b/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out index 4c9e94ece3..b1b5ddce85 100644 --- a/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out +++ b/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out @@ -142,10 +142,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '5' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -167,10 +167,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '5' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out index b8d480afc7..406096af27 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -269,7 +269,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + filterExpr: ((cdecimal1) IN (2365.8945945946, -3367.6517567568) or (cdecimal1) IN (881.0135135135)) (type: boolean) Statistics: Num rows: 12289 Data size: 1027600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -277,8 +277,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterDecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) - predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterDecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, -3367.6517567568]), FilterDecimalColumnInList(col 1:decimal(20,10), values [881.0135135135])) + predicate: ((cdecimal1) IN (2365.8945945946, -3367.6517567568) or (cdecimal1) IN (881.0135135135)) (type: boolean) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdecimal1 (type: decimal(20,10)) @@ -1300,13 +1300,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + expressions: ((cdecimal1) IN (2365.8945945946, -3367.6517567568) or (cdecimal1) IN (881.0135135135)) (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean + projectedOutputColumnNums: [7] + selectExpressions: ColOrCol(col 5:boolean, col 6:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, -3367.6517567568]) -> 5:boolean, DecimalColumnInList(col 1:decimal(20,10), values [881.0135135135]) -> 6:boolean) -> 7:boolean Statistics: Num rows: 12289 Data size: 1027600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -1314,15 +1314,15 @@ STAGE PLANS: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:boolean + keyExpressions: col 7:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) - minReductionHashAggr: 0.52225566 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5871 Data size: 70452 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -1332,7 +1332,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5871 Data size: 70452 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1367,7 +1367,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5871 Data size: 70452 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -1376,7 +1376,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5871 Data size: 70452 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1394,13 +1394,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 5871 Data size: 70452 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5871 Data size: 70452 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index cc72f4546a..7e58042040 100644 --- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -1131,13 +1131,13 @@ STAGE PLANS: keyExpressions: col 0:int native: true Select Operator - expressions: q548284 (type: int), CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END (type: decimal(2,1)) + expressions: q548284 (type: int), CAST( CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END AS decimal(11,1)) (type: decimal(11,1)) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 8] - selectExpressions: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1) + projectedOutputColumnNums: [0, 9] + selectExpressions: CastDecimalToDecimal(col 8:decimal(2,1))(children: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1)) -> 9:decimal(11,1) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1148,10 +1148,10 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 8:decimal(2,1) + valueColumns: 9:decimal(11,1) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: decimal(2,1)) + value expressions: _col1 (type: decimal(11,1)) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -1168,7 +1168,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: q548284:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1)] + scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1), decimal(11,1)] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1181,12 +1181,12 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(2,1) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(2,1)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(11,1)) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out index 03e277a15b..ae7bd5b3ca 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -76,24 +76,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 11590 Data size: 2232584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -114,7 +114,7 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, decimal(11,4)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 02c9d660ba..72cacf868e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -1689,7 +1689,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) + filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1697,14 +1697,14 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 14:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 14:decimal(7,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterLongColEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 17:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 17:decimal(7,2)/DECIMAL_64), FilterStringColLikeStringScalar(col 7:string, pattern ss))) - predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) - Statistics: Num rows: 10922 Data size: 2201730 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 14:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 14:decimal(7,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 17:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 17:decimal(7,2)/DECIMAL_64), FilterStringColLikeStringScalar(col 7:string, pattern ss))) + predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) + Statistics: Num rows: 4778 Data size: 963360 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++++++++ keys: cboolean1 (type: boolean), cstring1 (type: string), ctimestamp2 (type: timestamp), cfloat (type: float), cbigint (type: bigint), cdouble (type: double), cint (type: int), csmallint (type: smallint), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) null sort order: zzzzzzzzzzzzzzzzzzzzz - Statistics: Num rows: 10922 Data size: 2201730 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 963360 Basic stats: COMPLETE Column stats: COMPLETE top n: 75 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -1718,7 +1718,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 11, 9, 5, 4, 3, 1, 10, 22, 26, 27, 13, 39, 15, 16, 41, 34, 42, 30, 37, 44] selectExpressions: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 22:int, LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 26:bigint, LongColUnaryMinus(col 3:bigint) -> 27:bigint, DoubleColUnaryMinus(col 4:float) -> 13:float, LongColAddLongColumn(col 37:bigint, col 3:bigint)(children: LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 37:bigint) -> 39:bigint, DoubleColDivideDoubleColumn(col 5:double, col 5:double) -> 15:double, DoubleColUnaryMinus(col 5:double) -> 16:double, LongColMultiplyLongColumn(col 37:bigint, col 40:bigint)(children: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 37:int, LongColUnaryMinus(col 3:bigint) -> 40:bigint) -> 41:bigint, DoubleColAddDoubleColumn(col 29:double, col 30:double)(children: DoubleColUnaryMinus(col 5:double) -> 29:double, CastLongToDouble(col 3:bigint) -> 30:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -1.389, col 32:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 32:decimal(3,0)) -> 42:decimal(8,7), DoubleColModuloDoubleColumn(col 29:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 29:double) -> 30:double, LongColUnaryMinus(col 1:smallint) -> 37:smallint, LongColAddLongColumn(col 1:int, col 43:int)(children: col 1:smallint, LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 43:int) -> 44:int - Statistics: Num rows: 10922 Data size: 3012774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 1318066 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) null sort order: zzzzzzzzzzzzzzzzzzzzz @@ -1727,7 +1727,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10922 Data size: 3012774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 1318066 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized, llap @@ -1757,7 +1757,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6, 1, 21, 2, 5, 3, 4, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 10922 Data size: 3012774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 1318066 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 75 Limit Vectorization: @@ -2009,7 +2009,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) + filterExpr: (((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (csmallint < -6432S)) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) Statistics: Num rows: 12288 Data size: 2403694 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -2017,14 +2017,14 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterDecimal64ColLessEqualDecimal64Scalar(col 13:decimal(13,3)/DECIMAL_64, val -1389)(children: CastLongToDecimal64(col 2:int) -> 13:decimal(13,3)/DECIMAL_64), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:int, val -6432)(children: col 1:smallint)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) - predicate: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) - Statistics: Num rows: 3868 Data size: 756762 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimal64ColLessEqualDecimal64Scalar(col 13:decimal(13,3)/DECIMAL_64, val -1389)(children: CastLongToDecimal64(col 2:int) -> 13:decimal(13,3)/DECIMAL_64), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:smallint, val -6432)), FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) + predicate: (((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (csmallint < -6432S)) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) + Statistics: Num rows: 3828 Data size: 749058 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++ keys: csmallint (type: smallint), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), (UDFToDouble(cbigint) / 3569.0D) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175D) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) null sort order: zzzzzzzzzzzzzzz - Statistics: Num rows: 3868 Data size: 756762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 749058 Basic stats: COMPLETE Column stats: COMPLETE top n: 45 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -2038,7 +2038,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 7, 5, 4, 3, 1, 21, 28, 15, 26, 29, 31, 32, 33, 34, 36] selectExpressions: DoubleColDivideDoubleScalar(col 15:double, val 3569.0)(children: CastLongToDouble(col 3:bigint) -> 15:double) -> 21:double, LongScalarSubtractLongColumn(val -257, col 1:int)(children: col 1:smallint) -> 28:int, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4:float) -> 15:float, DoubleColUnaryMinus(col 5:double) -> 26:double, DoubleColMultiplyDoubleScalar(col 5:double, val 10.175) -> 29:double, DoubleColDivideDoubleColumn(col 30:double, col 4:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4:float) -> 30:float, col 4:float) -> 31:double, DoubleColUnaryMinus(col 4:float) -> 32:float, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 33:int, DoubleColUnaryMinus(col 5:double) -> 34:double, DoubleColMultiplyDoubleColumn(col 5:double, col 35:double)(children: DoubleColUnaryMinus(col 5:double) -> 35:double) -> 36:double - Statistics: Num rows: 3868 Data size: 552696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 547232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) null sort order: zzzzzzzzzzzzzzz @@ -2047,7 +2047,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3868 Data size: 552696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 547232 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized, llap @@ -2077,7 +2077,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] - Statistics: Num rows: 3868 Data size: 552696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 547232 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 45 Limit Vectorization: @@ -2271,7 +2271,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((UDFToInteger(ctinyint) <= cint) and (UDFToInteger(csmallint) >= -257) and (UDFToDouble(cint) >= cdouble)) (type: boolean) + filterExpr: ((csmallint >= -257S) and (UDFToInteger(ctinyint) <= cint) and (UDFToDouble(cint) >= cdouble)) (type: boolean) Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -2279,14 +2279,14 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterLongColGreaterEqualLongScalar(col 1:int, val -257)(children: col 1:smallint), FilterDoubleColGreaterEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double)) - predicate: ((UDFToInteger(ctinyint) <= cint) and (UDFToInteger(csmallint) >= -257) and (UDFToDouble(cint) >= cdouble)) (type: boolean) - Statistics: Num rows: 455 Data size: 9548 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1:smallint, val -257), FilterLongColLessEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double)) + predicate: ((csmallint >= -257S) and (UDFToInteger(ctinyint) <= cint) and (UDFToDouble(cint) >= cdouble)) (type: boolean) + Statistics: Num rows: 693 Data size: 14504 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: + keys: csmallint (type: smallint) null sort order: z - Statistics: Num rows: 455 Data size: 9548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 14504 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -2300,7 +2300,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 0, 13, 16, 17, 20] selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 1:smallint) -> 14:double, CastLongToDouble(col 1:smallint) -> 15:double) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 455 Data size: 9548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 14504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col4), sum(_col3), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count() Group By Vectorization: @@ -2312,10 +2312,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) - minReductionHashAggr: 0.49890107 + minReductionHashAggr: 0.53679657 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) null sort order: z @@ -2325,7 +2325,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2360,12 +2360,12 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++ keys: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col4 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col4)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), ((_col5 - ((_col6 * _col6) / _col7)) / _col7) (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col8 (type: bigint), (_col8 - -89010L) (type: bigint) null sort order: zzzzzzzzzzz - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -2379,7 +2379,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 15, 10, 34, 4, 16, 31, 12, 30, 8, 36] selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 15:int, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 30:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 10:double) -> 11:double) -> 10:double, IfExprNullCondExpr(col 20:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 30:bigint) -> 11:double) -> 10:double, DecimalScalarDivideDecimalColumn(val -1.389, col 18:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 18:decimal(5,0)) -> 34:decimal(10,9), DoubleColDivideDoubleColumn(col 11:double, col 12:double)(children: CastLongToDouble(col 30:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 30:int) -> 11:double, CastLongToDouble(col 4:bigint) -> 12:double) -> 16:double, LongColUnaryMinus(col 30:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 30:int) -> 31:int, DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 12:double)(children: DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 11:double) -> 12:double) -> 11:double) -> 12:double, LongColUnaryMinus(col 35:int)(children: LongColUnaryMinus(col 30:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 30:int) -> 35:int) -> 30:int, LongColSubtractLongScalar(col 8:bigint, val -89010) -> 36:bigint - Statistics: Num rows: 227 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 55196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) null sort order: zzzzzzzzzzz @@ -2388,7 +2388,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 227 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 55196 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -2406,7 +2406,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 227 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 55196 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -2576,30 +2576,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 2563.58), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)(children: col 2:int), FilterLongColLessLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterDoubleColLessDoubleScalar(col 4:float, val -5638.14990234375)), FilterDecimal64ColEqualDecimal64Scalar(col 13:decimal(6,2)/DECIMAL_64, val 256358)(children: CastLongToDecimal64(col 0:tinyint) -> 13:decimal(6,2)/DECIMAL_64), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 14:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(21,2), val -5638.15)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(21,2))))) - predicate: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) - Statistics: Num rows: 7494 Data size: 179052 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 2563.58), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)(children: col 2:int), FilterLongColLessLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterDoubleColLessDoubleScalar(col 4:float, val -5638.14990234375)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 3:bigint) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(21,2), val -5638.15)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(21,2))))) + predicate: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) + Statistics: Num rows: 1362 Data size: 28504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cfloat (type: float), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 4, 16] - selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 16:double - Statistics: Num rows: 7494 Data size: 179052 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 4, 15] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double + Statistics: Num rows: 1362 Data size: 28504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col0), count(_col0), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double + aggregators: VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double @@ -2607,10 +2607,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) - minReductionHashAggr: 0.55004 + minReductionHashAggr: 0.54919237 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3372 Data size: 155032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 28232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z @@ -2620,7 +2620,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3372 Data size: 155032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 28232 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2655,7 +2655,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3372 Data size: 155032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 28232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2664,7 +2664,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 12, 20, 28, 4, 37, 55, 5, 59, 68, 73, 81, 82, 2, 84] selectExpressions: DoubleColDivideLongColumn(col 8:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 8:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 12:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 13:double) -> 14:double) -> 15:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 27:double)(children: DoubleColDivideLongColumn(col 23:double, col 26:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 21:double) -> 22:double) -> 23:double, IfExprNullCondExpr(col 24:boolean, null, col 25:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 24:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 25:bigint) -> 26:bigint) -> 27:double) -> 28:double, DoubleColAddDoubleScalar(col 36:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 35:double)(children: DoubleColDivideLongColumn(col 31:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 31:double, IfExprNullCondExpr(col 32:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 32:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 35:double) -> 36:double) -> 37:double, DoubleColMultiplyDoubleColumn(col 45:double, col 54:double)(children: DoubleColUnaryMinus(col 44:double)(children: DoubleColDivideLongColumn(col 40:double, col 43:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 38:double) -> 39:double) -> 40:double, IfExprNullCondExpr(col 41:boolean, null, col 42:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 41:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 42:bigint) -> 43:bigint) -> 44:double) -> 45:double, DoubleColAddDoubleScalar(col 53:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 52:double)(children: DoubleColDivideLongColumn(col 48:double, col 51:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 47:double)(children: DoubleColDivideLongColumn(col 46:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 46:double) -> 47:double) -> 48:double, IfExprNullCondExpr(col 49:boolean, null, col 50:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 49:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 50:bigint) -> 51:bigint) -> 52:double) -> 53:double) -> 54:double) -> 55:double, DoubleColDivideLongColumn(col 58:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 57:double)(children: DoubleColDivideLongColumn(col 56:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double, DoubleColSubtractDoubleColumn(col 0:double, col 67:double)(children: DoubleColUnaryMinus(col 66:double)(children: DoubleColDivideLongColumn(col 62:double, col 65:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 61:double)(children: DoubleColDivideLongColumn(col 60:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 60:double) -> 61:double) -> 62:double, IfExprNullCondExpr(col 63:boolean, null, col 64:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 63:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 64:bigint) -> 65:bigint) -> 66:double) -> 67:double) -> 68:double, FuncPowerDoubleToDouble(col 72:double)(children: DoubleColDivideLongColumn(col 71:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 70:double)(children: DoubleColDivideLongColumn(col 69:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 69:double) -> 70:double) -> 71:double) -> 72:double) -> 73:double, DoubleColAddDoubleColumn(col 0:double, col 80:double)(children: DoubleColDivideLongColumn(col 76:double, col 79:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 75:double)(children: DoubleColDivideLongColumn(col 74:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 74:double) -> 75:double) -> 76:double, IfExprNullCondExpr(col 77:boolean, null, col 78:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 77:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 78:bigint) -> 79:bigint) -> 80:double) -> 81:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 82:double, DoubleScalarModuloDoubleColumn(val -863.257, col 83:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 83:double) -> 84:double - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z @@ -2673,7 +2673,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -2691,13 +2691,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index d7f8f73b9b..a2d437bd9f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -1180,10 +1180,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -1375,10 +1375,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -2968,10 +2968,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -3134,10 +3134,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -5644,10 +5644,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) diff --git a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index bdb876e618..7b2bc2ed64 100644 --- a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -239,10 +239,10 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan alias: b - filterExpr: ((id = 'Id_2') and (val = 'val_104')) (type: boolean) + filterExpr: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((id = 'Id_2') and (val = 'val_104')) (type: boolean) + predicate: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out index d59f9f338d..8b3541c85b 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -72,24 +72,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 11590 Data size: 2232584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out index 13f4e78880..5023927b2e 100644 --- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -235,6 +235,7 @@ POSTHOOK: Input: default@partcoltypenum POSTHOOK: Input: default@partcoltypenum@tint=110/sint=22000/bint=330000000000 #### A masked pattern was here #### 30 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: select count(1) from partcoltypenum where tint=110Y and sint=22000 and bint='330000000000' PREHOOK: type: QUERY PREHOOK: Input: default@partcoltypenum diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index d10a0708a4..3f561cca07 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -1560,7 +1560,7 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### OPTIMIZED SQL: SELECT `ds` FROM `default`.`pcs_t1` -WHERE `ds` <> '2000-04-08' AND `key` = 3 OR (`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 +WHERE `ds` <> '2000-04-08' AND `key` <> 2 AND `key` = 3 OR (`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1571,12 +1571,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcs_t1 - filterExpr: (((ds <> '2000-04-08') and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) + filterExpr: (((ds <> '2000-04-08') and (key <> 2) and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) Statistics: Num rows: 60 Data size: 11280 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds <> '2000-04-08') and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) + predicate: (((ds <> '2000-04-08') and (key <> 2) and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -1796,7 +1796,7 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### OPTIMIZED SQL: SELECT `ds` FROM `default`.`pcs_t1` -WHERE `key` = 3 OR ((`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 OR `ds` <> '2000-04-08' AND `key` = 3) AND `key` + 5 > 0 +WHERE `key` = 3 OR ((`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 OR `ds` <> '2000-04-08' AND `key` <> 2 AND `key` = 3) AND `key` + 5 > 0 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1807,12 +1807,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcs_t1 - filterExpr: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key = 3))) and ((key + 5) > 0))) (type: boolean) + filterExpr: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key <> 2) and (key = 3))) and ((key + 5) > 0))) (type: boolean) Statistics: Num rows: 60 Data size: 11280 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key = 3))) and ((key + 5) > 0))) (type: boolean) + predicate: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key <> 2) and (key = 3))) and ((key + 5) > 0))) (type: boolean) Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out index e076022ffa..ef47177e98 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out @@ -144,7 +144,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5= HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer]], table:alias=[c]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[AND(IN($7, _UTF-16LE'Walker County', _UTF-16LE'Richland County', _UTF-16LE'Gaines County', _UTF-16LE'Douglas County', _UTF-16LE'Dona Ana County'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($7, _UTF-16LE'Walker County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Richland County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Gaines County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Douglas County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Dona Ana County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out index b6f3727e70..65a61d40d1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out @@ -189,7 +189,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f9=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -203,7 +203,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out index edb893eb6d..21bd652df0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out @@ -77,7 +77,7 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Sports':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out index 9fab0cbed0..12e68344d7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out @@ -119,8 +119,8 @@ HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[CAST(/($2, $3)):DECIMAL(11, 6)], HiveJoin(condition=[AND(=($6, $26), OR(AND($20, $21, $15, $27), AND($22, $23, $16, $28), AND($24, $25, $17, $28)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($19, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $12), AND($2, $13), AND($3, $14)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN2=[IN($8, _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN9=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN10=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN11=[BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN12=[BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN13=[BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) @@ -130,7 +130,7 @@ HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[CAST(/($2, $3)):DECIMAL(11, 6)], HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'D':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'4 yr Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Primary':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out index 62dab7cd32..dc760a8f9f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query15.q.out @@ -55,12 +55,12 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_zip=[$9], IN=[IN(substr($9, 1, 5), _UTF-16LE'85669', _UTF-16LE'86197', _UTF-16LE'88274', _UTF-16LE'83405', _UTF-16LE'86475', _UTF-16LE'85392', _UTF-16LE'85460', _UTF-16LE'80348', _UTF-16LE'81792')], IN3=[IN($8, _UTF-16LE'CA', _UTF-16LE'WA', _UTF-16LE'GA')]) + HiveProject(ca_address_sk=[$0], ca_zip=[$9], IN=[IN(substr($9, 1, 5), _UTF-16LE'85669':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88274':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83405':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86475':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85392':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85460':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80348':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81792':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'CA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_sales_price=[$2], >=[$3], d_date_sk=[$4]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_sales_price=[$21], >=[>($21, 500:DECIMAL(3, 0))]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_sales_price=[$21], >=[>($21, 500)]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out index 1bfd9d8855..2bc16ffaf7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out @@ -89,7 +89,7 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[su HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) - HiveFilter(condition=[AND(IN($25, _UTF-16LE'Ziebach County', _UTF-16LE'Levy County', _UTF-16LE'Huron County', _UTF-16LE'Franklin Parish', _UTF-16LE'Daviess County'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($25, _UTF-16LE'Ziebach County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Daviess County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject(cs_warehouse_sk=[$14], cs_order_number=[$17]) HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($17))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out index 302b72d243..a45f6e81f6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query17.q.out @@ -125,7 +125,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($15, _UTF-16LE'2000Q1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q2':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q3':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -133,7 +133,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($9))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($15, _UTF-16LE'2000Q1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q2':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q3':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(s_store_sk=[$0], s_state=[$24]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out index c848aea887..90648d8207 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out @@ -89,7 +89,7 @@ HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'ND':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NC':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OK':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MS':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'TN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(cd_demo_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out index 9aa0653d0b..2700e6a009 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out @@ -69,7 +69,7 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Sports':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out index 3822a36984..89052c6392 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out @@ -85,6 +85,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(2, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out index a210632ae4..3092bf4d15 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out @@ -146,7 +146,7 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(_o__c0=[*(0.05:DECIMAL(3, 2), CAST(/($0, $1)):DECIMAL(21, 6))]) + HiveProject(_o__c0=[*(0.05:DECIMAL(2, 2), CAST(/($0, $1)):DECIMAL(21, 6))]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(21, 6))]) HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out index c0f1462b22..e8e0432a6d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out @@ -76,6 +76,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'FL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'LA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'SC':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out index 5fd5f94034..f840da1fdb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out @@ -112,10 +112,10 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($6, $1)], store_q1_q2_increase=[/($9, $11)], web_q2_q3_increase=[/($4, $6)], store_q2_q3_increase=[/($13, $9)]) - HiveJoin(condition=[AND(=($8, $0), CASE(>($11, 0:DECIMAL(1, 0)), CASE($2, >(/($6, $1), /($9, $11)), false), false), CASE(>($9, 0:DECIMAL(1, 0)), CASE($7, >(/($4, $6), /($13, $9)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($8, $0), CASE(>($11, 0), CASE($2, >(/($6, $1), /($9, $11)), false), false), CASE(>($9, 0), CASE($7, >(/($4, $6), /($13, $9)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0:DECIMAL(1, 0))]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) @@ -141,7 +141,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($10, 3), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0:DECIMAL(1, 0))]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out index 8419b7067a..ad831bfa23 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out @@ -180,7 +180,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) @@ -203,7 +203,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) @@ -226,7 +226,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out index 817b82f7aa..728200600c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out @@ -91,9 +91,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)), OR(<=(1, $9), <=($9, 3), <=(25, $9), <=($9, 28)), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), false), IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), false), IN($2, _UTF-16LE'>10000':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County', _UTF-16LE'Fairfield County', _UTF-16LE'Jackson County', _UTF-16LE'Barrow County', _UTF-16LE'Pennington County'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Maverick County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Kittitas County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Fairfield County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Jackson County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Barrow County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Pennington County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out index c2a11d5c6e..3f152fd80e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query36.q.out @@ -83,7 +83,7 @@ HiveProject(gross_margin=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], HiveFilter(condition=[AND(=($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC', _UTF-16LE'AL', _UTF-16LE'GA'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'FL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'LA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'SC':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out index 7b01a6e8b9..a35db692b3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out @@ -274,7 +274,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f8=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -288,7 +288,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -302,7 +302,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out index 312ac30dd4..b5e27762e4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out @@ -86,6 +86,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(2, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out index d017e2ceff..e82afba419 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query43.q.out @@ -56,6 +56,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveFilter(condition=[AND(=($27, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($27, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out index a7491f55d8..2360931dcc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out @@ -107,7 +107,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), IN($7, 6, 0), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Wildwood':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Union':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Salem':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Highland Park':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out index 60d80e010f..c706a7d2c4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out @@ -155,7 +155,7 @@ HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], su HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) - HiveFilter(condition=[AND(=($4, 2000), >($7, 0:DECIMAL(1, 0)), CASE(>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($8))]) + HiveFilter(condition=[AND(=($4, 2000), >($7, 0), CASE(>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(1, 1)), false), IS NOT NULL($8))]) HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $1, $0, $4, $5, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out index 97c9f47e38..5749c88788 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out @@ -147,8 +147,8 @@ HiveAggregate(group=[{}], agg#0=[sum($10)]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $10), AND($2, $11), AND($3, $12)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN2=[IN($8, _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cd_demo_sk=[$0]) @@ -156,7 +156,7 @@ HiveAggregate(group=[{}], agg#0=[sum($10)]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0:DECIMAL(12, 2), 2000:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 3000:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 25000:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out index 99569a8431..7de6c1fd46 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out @@ -283,11 +283,11 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveAggregate(group=[{5}], agg#0=[sum($2)], agg#1=[sum($7)], agg#2=[sum($3)], agg#3=[sum($8)]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_order_number=[$13], CASE=[CASE(IS NOT NULL($14), $14, 0)], CASE3=[CASE(IS NOT NULL($15), $15, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($15, 10000:DECIMAL(5, 0)), IS NOT NULL($13), IS NOT NULL($2))]) + HiveFilter(condition=[AND(>($15, 10000), IS NOT NULL($13), IS NOT NULL($2))]) HiveTableScan(table=[[default, web_returns]], table:alias=[wr]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], CASE=[CASE(IS NOT NULL($18), $18, 0)], CASE4=[CASE(IS NOT NULL($29), $29, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($3))]) + HiveFilter(condition=[AND(>($18, 0), >($33, 1), >($29, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($3))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) @@ -299,11 +299,11 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveAggregate(group=[{5}], agg#0=[sum($2)], agg#1=[sum($7)], agg#2=[sum($3)], agg#3=[sum($8)]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_item_sk=[$2], cr_order_number=[$16], CASE=[CASE(IS NOT NULL($17), $17, 0)], CASE3=[CASE(IS NOT NULL($18), $18, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($18, 10000:DECIMAL(5, 0)), IS NOT NULL($16), IS NOT NULL($2))]) + HiveFilter(condition=[AND(>($18, 10000), IS NOT NULL($16), IS NOT NULL($2))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], CASE=[CASE(IS NOT NULL($18), $18, 0)], CASE4=[CASE(IS NOT NULL($29), $29, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($15))]) + HiveFilter(condition=[AND(>($18, 0), >($33, 1), >($29, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($15))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) @@ -315,11 +315,11 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveAggregate(group=[{5}], agg#0=[sum($2)], agg#1=[sum($7)], agg#2=[sum($3)], agg#3=[sum($8)]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], CASE=[CASE(IS NOT NULL($10), $10, 0)], CASE3=[CASE(IS NOT NULL($11), $11, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($11, 10000:DECIMAL(5, 0)), IS NOT NULL($9), IS NOT NULL($2))]) + HiveFilter(condition=[AND(>($11, 10000), IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_returns]], table:alias=[sr]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], CASE=[CASE(IS NOT NULL($10), $10, 0)], CASE4=[CASE(IS NOT NULL($20), $20, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($22, 1:DECIMAL(1, 0)), >($20, 0:DECIMAL(1, 0)), >($10, 0), IS NOT NULL($0), IS NOT NULL($9), IS NOT NULL($2))]) + HiveFilter(condition=[AND(>($10, 0), >($22, 1), >($20, 0), IS NOT NULL($0), IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out index a440eb2eed..6e2aecff55 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out @@ -65,7 +65,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) - HiveFilter(condition=[CASE(>($2, 0:DECIMAL(1, 0)), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(2, 1)), false)]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(1, 1)), false)]) HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_manufact_id=[$0], d_qoy=[$1], $f2=[$2]) HiveAggregate(group=[{6, 8}], agg#0=[sum($4)]) @@ -79,7 +79,7 @@ HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'reference':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), OR(AND(IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'reference':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(IN($12, _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_qoy=[$10]) HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out index 8a3780b20d..54f4843d37 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out @@ -161,12 +161,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0]) HiveAggregate(group=[{1}]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace'), IS NOT NULL($1))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chiffon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'lace':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($1))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -8:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -8), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) @@ -184,12 +184,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0]) HiveAggregate(group=[{1}]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace'), IS NOT NULL($1))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chiffon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'lace':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($1))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -8:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -8), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) @@ -207,12 +207,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0]) HiveAggregate(group=[{1}]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace'), IS NOT NULL($1))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'orchid':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chiffon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'lace':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($1))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -8:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -8), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out index f0dd0f2f37..15041169b8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out @@ -149,7 +149,7 @@ HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) - HiveFilter(condition=[AND(=($3, 2000), >($6, 0:DECIMAL(1, 0)), CASE(>($6, 0:DECIMAL(1, 0)), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($7))]) + HiveFilter(condition=[AND(=($3, 2000), >($6, 0), CASE(>($6, 0), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(1, 1)), false), IS NOT NULL($7))]) HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $1, $0, $4, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out index 52e9e79860..935d13062e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -144,7 +144,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), $9)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($5, $1), $9)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), $9)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($5, $1), $9), 3:DECIMAL(10, 0))]) HiveJoin(condition=[AND(=($0, $8), BETWEEN(false, $5, $10, $11), BETWEEN(false, $1, $10, $11), BETWEEN(false, $9, $6, $7), BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($4, $0), BETWEEN(false, $5, $2, $3), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) + HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(1, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -175,7 +175,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) + HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(1, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -206,7 +206,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) + HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(1, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out index 8802220d23..986c28ee0a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out @@ -186,7 +186,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) @@ -209,7 +209,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) @@ -232,7 +232,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -6), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out index eec48472d5..921432c8e9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out @@ -112,7 +112,7 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -7:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -7), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_promo_sk=[$4], ss_ext_sales_price=[$5], d_date_sk=[$6], i_item_sk=[$7], s_store_sk=[$8], p_promo_sk=[$9]) HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -129,7 +129,7 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[AND(=($12, _UTF-16LE'Electronics'), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(=($27, -7:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($27, -7), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[AND(OR(=($8, _UTF-16LE'Y'), =($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y')), IS NOT NULL($0))]) @@ -142,7 +142,7 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -7:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -7), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], s_store_sk=[$7]) HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -158,6 +158,6 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[AND(=($12, _UTF-16LE'Electronics'), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(=($27, -7:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($27, -7), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out index 35351157fb..ca3fe8037d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out @@ -67,7 +67,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) - HiveFilter(condition=[CASE(>($2, 0:DECIMAL(1, 0)), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(2, 1)), false)]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(1, 1)), false)]) HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_manager_id=[$0], d_moy=[$1], $f2=[$2]) HiveAggregate(group=[{6, 8}], agg#0=[sum($4)]) @@ -81,7 +81,7 @@ HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_manager_id=[$20]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'refernece':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), OR(AND(IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'refernece':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(IN($12, _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_moy=[$8]) HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out index 35b52752a1..2ce1b12dfa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out @@ -333,7 +333,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'burnished':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'dim':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'steel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'navajo':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chocolate':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) @@ -417,7 +417,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'burnished':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'dim':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'steel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'navajo':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chocolate':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out index ef48ec3ccf..c376328a3e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out @@ -87,7 +87,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], *=[*(0.1:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveProject($f0=[$0], *=[*(0.1:DECIMAL(1, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out index 3255a9fb95..2f3ce4a1dc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out @@ -481,7 +481,7 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AIRBORNE':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)]) @@ -500,7 +500,7 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AIRBORNE':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out index 6032192938..887a6d6ad5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out @@ -121,7 +121,7 @@ HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 1998, 1999, 2000), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Wildwood':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out index 4e617f99ed..e114e341fd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out @@ -122,7 +122,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[ HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer]], table:alias=[c]) HiveProject(ca_address_sk=[$0], ca_state=[$8]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'CO', _UTF-16LE'IL', _UTF-16LE'MN'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'CO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out index e1c8bc4a6a..5ed9c4f95f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out @@ -95,7 +95,7 @@ HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4 HiveAggregate(group=[{1, 2, 7, 8}], agg#0=[sum($3)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4]) - HiveFilter(condition=[AND(IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($9, _UTF-16LE'breakfast':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'dinner':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out index 671959d018..b3db960c46 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out @@ -85,9 +85,9 @@ HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last]) HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 2000, 2001, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), false), IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), false), IN($2, _UTF-16LE'>10000':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Maverick County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Kittitas County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out index 6b180bd064..047e21d943 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out @@ -161,7 +161,7 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f4=[$3]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($3, 0)]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) @@ -175,7 +175,7 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($3, 0)]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out index 73b94521e5..2737db0217 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out @@ -158,7 +158,7 @@ CBO PLAN: HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$5], sales_cnt_diff=[$6], sales_amt_diff=[$7]) HiveSortLimit(sort0=[$6], dir0=[ASC], fetch=[100]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$10], sales_cnt_diff=[-($10, $4)], sales_amt_diff=[-($11, $5)]) - HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9:DECIMAL(2, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9:DECIMAL(1, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out index 3176ddded5..621f32c008 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query8.q.out @@ -246,7 +246,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(ca_zip=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject(ca_zip=[substr($9, 1, 5)]) - HiveFilter(condition=[AND(IN(substr($9, 1, 5), _UTF-16LE'89436', _UTF-16LE'30868', _UTF-16LE'65085', _UTF-16LE'22977', _UTF-16LE'83927', _UTF-16LE'77557', _UTF-16LE'58429', _UTF-16LE'40697', _UTF-16LE'80614', _UTF-16LE'10502', _UTF-16LE'32779', _UTF-16LE'91137', _UTF-16LE'61265', _UTF-16LE'98294', _UTF-16LE'17921', _UTF-16LE'18427', _UTF-16LE'21203', _UTF-16LE'59362', _UTF-16LE'87291', _UTF-16LE'84093', _UTF-16LE'21505', _UTF-16LE'17184', _UTF-16LE'10866', _UTF-16LE'67898', _UTF-16LE'25797', _UTF-16LE'28055', _UTF-16LE'18377', _UTF-16LE'80332', _UTF-16LE'74535', _UTF-16LE'21757', _UTF-16LE'29742', _UTF-16LE'90885', _UTF-16LE'29898', _UTF-16LE'17819', _UTF-16LE'40811', _UTF-16LE'25990', _UTF-16LE'47513', _UTF-16LE'89531', _UTF-16LE'91068', _UTF-16LE'10391', _UTF-16LE'18846', _UTF-16LE'99223', _UTF-16LE'82637', _UTF-16LE'41368', _UTF-16LE'83658', _UTF-16LE'86199', _UTF-16LE'81625', _UTF-16LE'26696', _UTF-16LE'89338', _UTF-16LE'88425', _UTF-16LE'32200', _UTF-16LE'81427', _UTF-16LE'19053', _UTF-16LE'77471', _UTF-16LE'36610', _UTF-16LE'99823', _UTF-16LE'43276', _UTF-16LE'41249', _UTF-16LE'48584', _UTF-16LE'83550', _UTF-16LE'82276', _UTF-16LE'18842', _UTF-16LE'78890', _UTF-16LE'14090', _UTF-16LE'38123', _UTF-16LE'40936', _UTF-16LE'34425', _UTF-16LE'19850', _UTF-16LE'43286', _UTF-16LE'80072', _UTF-16LE'79188', _UTF-16LE'54191', _UTF-16LE'11395', _UTF-16LE'50497', _UTF-16LE'84861', _UTF-16LE'90733', _UTF-16LE'21068', _UTF-16LE'57666', _UTF-16LE'37119', _UTF-16LE'25004', _UTF-16LE'57835', _UTF-16LE'70067', _UTF-16LE'62878', _UTF-16LE'95806', _UTF-16LE'19303', _UTF-16LE'18840', _UTF-16LE'19124', _UTF-16LE'29785', _UTF-16LE'16737', _UTF-16LE'16022', _UTF-16LE'49613', _UTF-16LE'89977', _UTF-16LE'68310', _UTF-16LE'60069', _UTF-16LE'98360', _UTF-16LE'48649', _UTF-16LE'39050', _UTF-16LE'41793', _UTF-16LE'25002', _UTF-16LE'27413', _UTF-16LE'39736', _UTF-16LE'47208', _UTF-16LE'16515', _UTF-16LE'94808', _UTF-16LE'57648', _UTF-16LE'15009', _UTF-16LE'80015', _UTF-16LE'42961', _UTF-16LE'63982', _UTF-16LE'21744', _UTF-16LE'71853', _UTF-16LE'81087', _UTF-16LE'67468', _UTF-16LE'34175', _UTF-16LE'64008', _UTF-16LE'20261', _UTF-16LE'11201', _UTF-16LE'51799', _UTF-16LE'48043', _UTF-16LE'45645', _UTF-16LE'61163', _UTF-16LE'48375', _UTF-16LE'36447', _UTF-16LE'57042', _UTF-16LE'21218', _UTF-16LE'41100', _UTF-16LE'89951', _UTF-16LE'22745', _UTF-16LE'35851', _UTF-16LE'83326', _UTF-16LE'61125', _UTF-16LE'78298', _UTF-16LE'80752', _UTF-16LE'49858', _UTF-16LE'52940', _UTF-16LE'96976', _UTF-16LE'63792', _UTF-16LE'11376', _UTF-16LE'53582', _UTF-16LE'18717', _UTF-16LE'90226', _UTF-16LE'50530', _UTF-16LE'94203', _UTF-16LE'99447', _UTF-16LE'27670', _UTF-16LE'96577', _UTF-16LE'57856', _UTF-16LE'56372', _UTF-16LE'16165', _UTF-16LE'23427', _UTF-16LE'54561', _UTF-16LE'28806', _UTF-16LE'44439', _UTF-16LE'22926', _UTF-16LE'30123', _UTF-16LE'61451', _UTF-16LE'92397', _UTF-16LE'56979', _UTF-16LE'92309', _UTF-16LE'70873', _UTF-16LE'13355', _UTF-16LE'21801', _UTF-16LE'46346', _UTF-16LE'37562', _UTF-16LE'56458', _UTF-16LE'28286', _UTF-16LE'47306', _UTF-16LE'99555', _UTF-16LE'69399', _UTF-16LE'26234', _UTF-16LE'47546', _UTF-16LE'49661', _UTF-16LE'88601', _UTF-16LE'35943', _UTF-16LE'39936', _UTF-16LE'25632', _UTF-16LE'24611', _UTF-16LE'44166', _UTF-16LE'56648', _UTF-16LE'30379', _UTF-16LE'59785', _UTF-16LE'11110', _UTF-16LE'14329', _UTF-16LE'93815', _UTF-16LE'52226', _UTF-16LE'71381', _UTF-16LE'13842', _UTF-16LE'25612', _UTF-16LE'63294', _UTF-16LE'14664', _UTF-16LE'21077', _UTF-16LE'82626', _UTF-16LE'18799', _UTF-16LE'60915', _UTF-16LE'81020', _UTF-16LE'56447', _UTF-16LE'76619', _UTF-16LE'11433', _UTF-16LE'13414', _UTF-16LE'42548', _UTF-16LE'92713', _UTF-16LE'70467', _UTF-16LE'30884', _UTF-16LE'47484', _UTF-16LE'16072', _UTF-16LE'38936', _UTF-16LE'13036', _UTF-16LE'88376', _UTF-16LE'45539', _UTF-16LE'35901', _UTF-16LE'19506', _UTF-16LE'65690', _UTF-16LE'73957', _UTF-16LE'71850', _UTF-16LE'49231', _UTF-16LE'14276', _UTF-16LE'20005', _UTF-16LE'18384', _UTF-16LE'76615', _UTF-16LE'11635', _UTF-16LE'38177', _UTF-16LE'55607', _UTF-16LE'41369', _UTF-16LE'95447', _UTF-16LE'58581', _UTF-16LE'58149', _UTF-16LE'91946', _UTF-16LE'33790', _UTF-16LE'76232', _UTF-16LE'75692', _UTF-16LE'95464', _UTF-16LE'22246', _UTF-16LE'51061', _UTF-16LE'56692', _UTF-16LE'53121', _UTF-16LE'77209', _UTF-16LE'15482', _UTF-16LE'10688', _UTF-16LE'14868', _UTF-16LE'45907', _UTF-16LE'73520', _UTF-16LE'72666', _UTF-16LE'25734', _UTF-16LE'17959', _UTF-16LE'24677', _UTF-16LE'66446', _UTF-16LE'94627', _UTF-16LE'53535', _UTF-16LE'15560', _UTF-16LE'41967', _UTF-16LE'69297', _UTF-16LE'11929', _UTF-16LE'59403', _UTF-16LE'33283', _UTF-16LE'52232', _UTF-16LE'57350', _UTF-16LE'43933', _UTF-16LE'40921', _UTF-16LE'36635', _UTF-16LE'10827', _UTF-16LE'71286', _UTF-16LE'19736', _UTF-16LE'80619', _UTF-16LE'25251', _UTF-16LE'95042', _UTF-16LE'15526', _UTF-16LE'36496', _UTF-16LE'55854', _UTF-16LE'49124', _UTF-16LE'81980', _UTF-16LE'35375', _UTF-16LE'49157', _UTF-16LE'63512', _UTF-16LE'28944', _UTF-16LE'14946', _UTF-16LE'36503', _UTF-16LE'54010', _UTF-16LE'18767', _UTF-16LE'23969', _UTF-16LE'43905', _UTF-16LE'66979', _UTF-16LE'33113', _UTF-16LE'21286', _UTF-16LE'58471', _UTF-16LE'59080', _UTF-16LE'13395', _UTF-16LE'79144', _UTF-16LE'70373', _UTF-16LE'67031', _UTF-16LE'38360', _UTF-16LE'26705', _UTF-16LE'50906', _UTF-16LE'52406', _UTF-16LE'26066', _UTF-16LE'73146', _UTF-16LE'15884', _UTF-16LE'31897', _UTF-16LE'30045', _UTF-16LE'61068', _UTF-16LE'45550', _UTF-16LE'92454', _UTF-16LE'13376', _UTF-16LE'14354', _UTF-16LE'19770', _UTF-16LE'22928', _UTF-16LE'97790', _UTF-16LE'50723', _UTF-16LE'46081', _UTF-16LE'30202', _UTF-16LE'14410', _UTF-16LE'20223', _UTF-16LE'88500', _UTF-16LE'67298', _UTF-16LE'13261', _UTF-16LE'14172', _UTF-16LE'81410', _UTF-16LE'93578', _UTF-16LE'83583', _UTF-16LE'46047', _UTF-16LE'94167', _UTF-16LE'82564', _UTF-16LE'21156', _UTF-16LE'15799', _UTF-16LE'86709', _UTF-16LE'37931', _UTF-16LE'74703', _UTF-16LE'83103', _UTF-16LE'23054', _UTF-16LE'70470', _UTF-16LE'72008', _UTF-16LE'49247', _UTF-16LE'91911', _UTF-16LE'69998', _UTF-16LE'20961', _UTF-16LE'70070', _UTF-16LE'63197', _UTF-16LE'54853', _UTF-16LE'88191', _UTF-16LE'91830', _UTF-16LE'49521', _UTF-16LE'19454', _UTF-16LE'81450', _UTF-16LE'89091', _UTF-16LE'62378', _UTF-16LE'25683', _UTF-16LE'61869', _UTF-16LE'51744', _UTF-16LE'36580', _UTF-16LE'85778', _UTF-16LE'36871', _UTF-16LE'48121', _UTF-16LE'28810', _UTF-16LE'83712', _UTF-16LE'45486', _UTF-16LE'67393', _UTF-16LE'26935', _UTF-16LE'42393', _UTF-16LE'20132', _UTF-16LE'55349', _UTF-16LE'86057', _UTF-16LE'21309', _UTF-16LE'80218', _UTF-16LE'10094', _UTF-16LE'11357', _UTF-16LE'48819', _UTF-16LE'39734', _UTF-16LE'40758', _UTF-16LE'30432', _UTF-16LE'21204', _UTF-16LE'29467', _UTF-16LE'30214', _UTF-16LE'61024', _UTF-16LE'55307', _UTF-16LE'74621', _UTF-16LE'11622', _UTF-16LE'68908', _UTF-16LE'33032', _UTF-16LE'52868', _UTF-16LE'99194', _UTF-16LE'99900', _UTF-16LE'84936', _UTF-16LE'69036', _UTF-16LE'99149', _UTF-16LE'45013', _UTF-16LE'32895', _UTF-16LE'59004', _UTF-16LE'32322', _UTF-16LE'14933', _UTF-16LE'32936', _UTF-16LE'33562', _UTF-16LE'72550', _UTF-16LE'27385', _UTF-16LE'58049', _UTF-16LE'58200', _UTF-16LE'16808', _UTF-16LE'21360', _UTF-16LE'32961', _UTF-16LE'18586', _UTF-16LE'79307', _UTF-16LE'15492'), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) + HiveFilter(condition=[AND(IN(substr($9, 1, 5), _UTF-16LE'89436':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30868':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'65085':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22977':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83927':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'77557':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58429':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40697':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80614':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10502':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32779':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91137':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61265':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'98294':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17921':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18427':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21203':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59362':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'87291':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'84093':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21505':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17184':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10866':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67898':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25797':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28055':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18377':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80332':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'74535':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21757':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29742':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'90885':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29898':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17819':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40811':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25990':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47513':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89531':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91068':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10391':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18846':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99223':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82637':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41368':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83658':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86199':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81625':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26696':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89338':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88425':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32200':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81427':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19053':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'77471':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36610':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99823':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43276':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41249':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48584':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83550':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82276':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18842':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'78890':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14090':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38123':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'34425':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19850':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80072':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'79188':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54191':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11395':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50497':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'84861':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'90733':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21068':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57666':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'37119':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25004':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57835':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70067':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'62878':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95806':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19303':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18840':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19124':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29785':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16737':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16022':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49613':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89977':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'68310':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'60069':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'98360':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48649':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39050':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41793':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25002':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'27413':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39736':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47208':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16515':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94808':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57648':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15009':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80015':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'42961':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63982':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21744':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71853':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81087':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67468':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'34175':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'64008':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20261':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11201':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'51799':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48043':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45645':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61163':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48375':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57042':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21218':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41100':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89951':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22745':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35851':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83326':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61125':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'78298':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80752':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49858':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52940':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'96976':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63792':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11376':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'53582':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18717':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'90226':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50530':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94203':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'27670':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'96577':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57856':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56372':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16165':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'23427':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54561':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28806':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'44439':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22926':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30123':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61451':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92397':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56979':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92309':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70873':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13355':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21801':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'46346':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'37562':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56458':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47306':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99555':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69399':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26234':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47546':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49661':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88601':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35943':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25632':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'24611':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'44166':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56648':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30379':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59785':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11110':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14329':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'93815':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52226':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71381':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13842':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25612':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63294':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14664':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21077':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82626':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18799':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'60915':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81020':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'76619':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11433':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13414':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'42548':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92713':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70467':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30884':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47484':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16072':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13036':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88376':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45539':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35901':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19506':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'65690':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'73957':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71850':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49231':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14276':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20005':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18384':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'76615':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11635':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38177':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55607':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41369':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58581':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58149':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91946':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33790':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'76232':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'75692':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95464':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22246':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'51061':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56692':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'53121':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'77209':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15482':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10688':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14868':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45907':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'73520':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'72666':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25734':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17959':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'24677':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'66446':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94627':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'53535':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15560':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41967':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69297':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11929':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59403':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33283':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52232':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57350':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43933':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40921':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36635':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10827':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19736':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80619':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25251':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95042':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15526':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36496':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55854':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49124':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81980':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35375':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49157':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63512':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28944':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14946':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36503':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54010':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18767':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'23969':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43905':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'66979':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33113':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58471':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59080':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13395':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'79144':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70373':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67031':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38360':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26705':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50906':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52406':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26066':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'73146':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15884':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'31897':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30045':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61068':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45550':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92454':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13376':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14354':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19770':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22928':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'97790':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50723':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'46081':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30202':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14410':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20223':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88500':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67298':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13261':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14172':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81410':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'93578':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83583':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'46047':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94167':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82564':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21156':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15799':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86709':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'37931':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'74703':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83103':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'23054':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70470':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'72008':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49247':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91911':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69998':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20961':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70070':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54853':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88191':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91830':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49521':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19454':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81450':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89091':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'62378':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25683':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61869':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'51744':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36580':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85778':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36871':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48121':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28810':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83712':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45486':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67393':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26935':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'42393':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20132':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55349':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86057':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21309':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80218':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10094':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11357':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48819':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39734':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40758':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30432':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21204':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29467':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30214':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61024':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55307':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'74621':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11622':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'68908':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33032':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52868':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99194':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99900':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'84936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69036':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99149':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45013':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32895':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59004':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32322':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14933':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33562':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'72550':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'27385':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58049':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58200':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16808':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21360':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32961':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18586':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'79307':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15492':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(ca_zip=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out index 634fc836b8..5d2ab0559e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out @@ -230,7 +230,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($5, 50), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) @@ -256,7 +256,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($5, 50), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) @@ -282,7 +282,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($5, 50), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out index 474193229a..37d88da38e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out @@ -169,7 +169,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-10-15':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-11-10':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], CAST=[CAST($1):DOUBLE]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) @@ -193,7 +193,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-10-15':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-11-10':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], CAST=[CAST($1):DOUBLE]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) @@ -217,6 +217,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-10-15':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-11-10':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out index c8f6ded703..1c996ac6a9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -198,18 +198,18 @@ HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveJoin(condition=[=($24, $13)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $22), =($2, $23), =($0, $10))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'D':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'4 yr Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Primary':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($8), IS NOT NULL($4), IS NOT NULL($12), IS NOT NULL($2), IS NOT NULL($13))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN2=[IN($8, _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'D':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'4 yr Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Primary':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out index 62aa4bb5c9..667c16e96b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query89.q.out @@ -66,7 +66,7 @@ CBO PLAN: HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7]) HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)]) - HiveFilter(condition=[CASE(<>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false)]) + HiveFilter(condition=[CASE(<>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(1, 1)), false)]) HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) HiveAggregate(group=[{5, 6, 7, 9, 11, 12}], agg#0=[sum($3)]) @@ -77,7 +77,7 @@ HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_co HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical', _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'), IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics', _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical')), AND(IN($12, _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), IN($10, _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'))), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'parenting':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'musical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'womens':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'birdal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($12, _UTF-16LE'Home':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Shoes':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), OR(AND(IN($12, _UTF-16LE'Home':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'wallpaper':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'parenting':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'musical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(IN($12, _UTF-16LE'Shoes':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'womens':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'birdal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_moy=[$8]) HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out index 3a2aacfff0..6cef63ba0e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out @@ -83,14 +83,14 @@ HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss= HiveJoin(condition=[=($9, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($11, -7:DECIMAL(1, 0)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($11, -7), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'W':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'Unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN(ROW($2, $3), ROW(_UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), ROW(_UTF-16LE'W':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4], cc_call_center_sk=[$5], cc_call_center_id=[$6], cc_name=[$7], cc_manager=[$8]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out index 8881f9e4db..e2a14a6e49 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out @@ -75,7 +75,7 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Sports':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out index 8633ba2e8e..b568921a5f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out @@ -144,7 +144,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5= HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[c]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) - HiveFilter(condition=[IN($7, _UTF-16LE'Walker County', _UTF-16LE'Richland County', _UTF-16LE'Gaines County', _UTF-16LE'Douglas County', _UTF-16LE'Dona Ana County')]) + HiveFilter(condition=[IN($7, _UTF-16LE'Walker County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Richland County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Gaines County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Douglas County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Dona Ana County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out index 53d470da5c..127003c78b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -187,7 +187,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -200,7 +200,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[=($6, 2001)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f9=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out index cfb7945fb4..6737448fe7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out @@ -77,7 +77,7 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Sports':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out index b87661b78b..8c291d5ce0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out @@ -118,11 +118,11 @@ HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[CAST(/($2, $3)):DECIMAL(11, 6)], HiveJoin(condition=[AND(=($13, $25), OR(AND($1, $2, $21, $26), AND($3, $4, $22, $27), AND($5, $6, $23, $27)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'D':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'4 yr Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Primary':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $11), AND($2, $12), AND($3, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN2=[IN($8, _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), =($10, _UTF-16LE'United States'))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN9=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN10=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN11=[BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN12=[BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN13=[BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out index fa7ad1f2dd..5d051a7074 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query15.q.out @@ -55,11 +55,11 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_zip=[$9], IN=[IN(substr($9, 1, 5), _UTF-16LE'85669', _UTF-16LE'86197', _UTF-16LE'88274', _UTF-16LE'83405', _UTF-16LE'86475', _UTF-16LE'85392', _UTF-16LE'85460', _UTF-16LE'80348', _UTF-16LE'81792')], IN3=[IN($8, _UTF-16LE'CA', _UTF-16LE'WA', _UTF-16LE'GA')]) + HiveProject(ca_address_sk=[$0], ca_zip=[$9], IN=[IN(substr($9, 1, 5), _UTF-16LE'85669':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88274':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83405':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86475':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85392':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85460':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80348':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81792':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'CA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_sales_price=[$2], >=[$3], d_date_sk=[$4]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_sales_price=[$21], >=[>($21, 500:DECIMAL(3, 0))]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_sales_price=[$21], >=[>($21, 500)]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out index b40ca94ac6..376b60a273 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out @@ -89,7 +89,7 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[su HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) - HiveFilter(condition=[IN($25, _UTF-16LE'Ziebach County', _UTF-16LE'Levy County', _UTF-16LE'Huron County', _UTF-16LE'Franklin Parish', _UTF-16LE'Daviess County')]) + HiveFilter(condition=[IN($25, _UTF-16LE'Ziebach County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Daviess County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject(cs_warehouse_sk=[$14], cs_order_number=[$17]) HiveFilter(condition=[IS NOT NULL($14)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out index 795d15ebbd..698ee38291 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out @@ -113,7 +113,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q2':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q3':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ticket_number=[$4], ss_quantity=[$5], d_date_sk=[$6], sr_returned_date_sk=[$7], sr_item_sk=[$8], sr_customer_sk=[$9], sr_ticket_number=[$10], sr_return_quantity=[$11], d_date_sk0=[$12]) HiveJoin(condition=[AND(=($2, $9), =($1, $8), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -130,7 +130,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q2':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'2000Q3':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(s_store_sk=[$0], s_state=[$24]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out index c5cd8b552d..e89d4c007b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out @@ -92,7 +92,7 @@ HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) - HiveFilter(condition=[IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN')]) + HiveFilter(condition=[IN($8, _UTF-16LE'ND':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NC':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OK':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MS':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'TN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_bill_cdemo_sk=[$2], cs_item_sk=[$3], CAST=[$4], CAST5=[$5], CAST6=[$6], CAST7=[$7], CAST8=[$8], d_date_sk=[$9], cd_demo_sk=[$10], CAST0=[$11]) HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out index 05c9f5fefd..f82af064c4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out @@ -69,7 +69,7 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Sports':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out index fc95a2bde8..42e348266f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out @@ -83,6 +83,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(2, 2), 1.49:DECIMAL(3, 2))]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 896495235d..7af039c7ce 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -145,7 +145,7 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(_o__c0=[*(0.05:DECIMAL(3, 2), CAST(/($0, $1)):DECIMAL(21, 6))]) + HiveProject(_o__c0=[*(0.05:DECIMAL(2, 2), CAST(/($0, $1)):DECIMAL(21, 6))]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(21, 6))]) HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out index 4ce4b8f143..caa2183530 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out @@ -75,6 +75,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[=($6, 2001)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveFilter(condition=[IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC')]) + HiveFilter(condition=[IN($24, _UTF-16LE'SD':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'FL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'LA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'SC':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out index ac1764a61b..76532bd335 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out @@ -112,10 +112,10 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($6, $1)], store_q1_q2_increase=[/($9, $11)], web_q2_q3_increase=[/($4, $6)], store_q2_q3_increase=[/($13, $9)]) - HiveJoin(condition=[AND(=($8, $0), CASE(>($11, 0:DECIMAL(1, 0)), CASE($2, >(/($6, $1), /($9, $11)), false), false), CASE(>($9, 0:DECIMAL(1, 0)), CASE($7, >(/($4, $6), /($13, $9)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($8, $0), CASE(>($11, 0), CASE($2, >(/($6, $1), /($9, $11)), false), false), CASE(>($9, 0), CASE($7, >(/($4, $6), /($13, $9)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0:DECIMAL(1, 0))]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) @@ -141,7 +141,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0:DECIMAL(1, 0))]) + HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out index 673dc3da57..a02e165ccd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out @@ -180,7 +180,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -6)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) @@ -203,7 +203,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -6)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) @@ -226,7 +226,7 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -6)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out index c7520fa293..1243fcd57c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out @@ -90,9 +90,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)), OR(<=(1, $9), <=($9, 3), <=(25, $9), <=($9, 28)))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), false), IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'))]) + HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), false), IN($2, _UTF-16LE'>10000':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County', _UTF-16LE'Fairfield County', _UTF-16LE'Jackson County', _UTF-16LE'Barrow County', _UTF-16LE'Pennington County')]) + HiveFilter(condition=[IN($23, _UTF-16LE'Mobile County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Maverick County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Kittitas County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Fairfield County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Jackson County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Barrow County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Pennington County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out index 9c70e60952..358ace45ce 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query36.q.out @@ -83,7 +83,7 @@ HiveProject(gross_margin=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], HiveFilter(condition=[=($6, 1999)]) HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC', _UTF-16LE'AL', _UTF-16LE'GA')]) + HiveFilter(condition=[IN($24, _UTF-16LE'SD':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'FL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'LA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'SC':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out index bb4ed738c6..f0ec403984 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out @@ -271,7 +271,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -284,7 +284,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[=($6, 2001)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f8=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) @@ -297,7 +297,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[=($6, 2001)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out index f71c0b57db..9f2d396e3f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out @@ -84,6 +84,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(2, 2), 1.49:DECIMAL(3, 2))]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out index 4eb819b1a1..65580497b6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out @@ -56,6 +56,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveFilter(condition=[=($6, 1998)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveFilter(condition=[=($27, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($27, -6)]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out index d0a32f7aab..64c5ac5417 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out @@ -105,7 +105,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), IN($7, 6, 0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park')]) + HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Wildwood':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Union':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Salem':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Highland Park':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[OR(=($4, 1), =($3, 2))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out index e71fd8778a..94bfb658d0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out @@ -155,7 +155,7 @@ HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], su HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) - HiveFilter(condition=[AND(=($4, 2000), >($7, 0:DECIMAL(1, 0)), CASE(>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($8))]) + HiveFilter(condition=[AND(=($4, 2000), >($7, 0), CASE(>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(1, 1)), false), IS NOT NULL($8))]) HiveProject((tok_table_or_col i_category)=[$5], (tok_table_or_col i_brand)=[$4], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$0], (tok_table_or_col d_moy)=[$1], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $5, $4, $2, $3, $0 ORDER BY $5 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST, $3 NULLS FIRST, $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $5, $4, $2, $3 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(d_year=[$0], d_moy=[$1], s_store_name=[$2], s_company_name=[$3], i_brand=[$4], i_category=[$5], $f6=[$6]) HiveAggregate(group=[{5, 6, 8, 9, 11, 12}], agg#0=[sum($3)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out index 4be0b33782..85f44dc586 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out @@ -143,8 +143,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveAggregate(group=[{}], agg#0=[sum($8)]) HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $9), AND($2, $10), AND($3, $11)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN2=[IN($8, _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), =($10, _UTF-16LE'United States'))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cd_demo_sk=[$0]) @@ -152,7 +152,7 @@ HiveAggregate(group=[{}], agg#0=[sum($8)]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0:DECIMAL(12, 2), 2000:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 3000:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 25000:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 1998)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out index b499ae96cf..7e5e8fa952 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out @@ -284,11 +284,11 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject($f0=[$5], $f1=[CASE(IS NOT NULL($2), $2, 0)], $f2=[CASE(IS NOT NULL($7), $7, 0)], $f3=[CASE(IS NOT NULL($3), $3, 0:DECIMAL(12, 2))], $f4=[CASE(IS NOT NULL($8), $8, 0:DECIMAL(12, 2))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) - HiveFilter(condition=[>($15, 10000:DECIMAL(5, 0))]) + HiveFilter(condition=[>($15, 10000)]) HiveTableScan(table=[[default, web_returns]], table:alias=[wr]) HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_net_paid=[$29], ws_net_profit=[$33]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($18, 0), >($33, 1), >($29, 0), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) @@ -301,11 +301,11 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject($f0=[$5], $f1=[CASE(IS NOT NULL($2), $2, 0)], $f2=[CASE(IS NOT NULL($7), $7, 0)], $f3=[CASE(IS NOT NULL($3), $3, 0:DECIMAL(12, 2))], $f4=[CASE(IS NOT NULL($8), $8, 0:DECIMAL(12, 2))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) - HiveFilter(condition=[>($18, 10000:DECIMAL(5, 0))]) + HiveFilter(condition=[>($18, 10000)]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr]) HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_net_paid=[$29], cs_net_profit=[$33]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($18, 0), >($33, 1), >($29, 0), IS NOT NULL($0))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) @@ -318,11 +318,11 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject($f0=[$5], $f1=[CASE(IS NOT NULL($2), $2, 0)], $f2=[CASE(IS NOT NULL($7), $7, 0)], $f3=[CASE(IS NOT NULL($3), $3, 0:DECIMAL(12, 2))], $f4=[CASE(IS NOT NULL($8), $8, 0:DECIMAL(12, 2))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) - HiveFilter(condition=[>($11, 10000:DECIMAL(5, 0))]) + HiveFilter(condition=[>($11, 10000)]) HiveTableScan(table=[[default, store_returns]], table:alias=[sr]) HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_net_paid=[$20], ss_net_profit=[$22]) - HiveFilter(condition=[AND(>($22, 1:DECIMAL(1, 0)), >($20, 0:DECIMAL(1, 0)), >($10, 0), IS NOT NULL($0))]) + HiveFilter(condition=[AND(>($10, 0), >($22, 1), >($20, 0), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out index e79c81064e..f8993fdf8a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query53.q.out @@ -65,7 +65,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) - HiveFilter(condition=[CASE(>($2, 0:DECIMAL(1, 0)), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(2, 1)), false)]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(1, 1)), false)]) HiveProject((tok_table_or_col i_manufact_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_manufact_id=[$0], d_qoy=[$1], $f2=[$2]) HiveAggregate(group=[{4, 6}], agg#0=[sum($2)]) @@ -75,7 +75,7 @@ HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))))]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'reference':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), OR(AND(IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'reference':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(IN($12, _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_qoy=[$10]) HiveFilter(condition=[IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out index e94e9988b1..5795b10e3d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out @@ -160,12 +160,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0]) HiveAggregate(group=[{1}]) - HiveFilter(condition=[IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace')]) + HiveFilter(condition=[IN($17, _UTF-16LE'orchid':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chiffon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'lace':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) @@ -182,12 +182,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0]) HiveAggregate(group=[{1}]) - HiveFilter(condition=[IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace')]) + HiveFilter(condition=[IN($17, _UTF-16LE'orchid':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chiffon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'lace':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) @@ -204,12 +204,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0]) HiveAggregate(group=[{1}]) - HiveFilter(condition=[IN($17, _UTF-16LE'orchid', _UTF-16LE'chiffon', _UTF-16LE'lace')]) + HiveFilter(condition=[IN($17, _UTF-16LE'orchid':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chiffon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'lace':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out index b3ae965061..ede1ffa36d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out @@ -149,7 +149,7 @@ HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_ HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) - HiveFilter(condition=[AND(=($3, 2000), >($6, 0:DECIMAL(1, 0)), CASE(>($6, 0:DECIMAL(1, 0)), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($7))]) + HiveFilter(condition=[AND(=($3, 2000), >($6, 0), CASE(>($6, 0), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(1, 1)), false), IS NOT NULL($7))]) HiveProject((tok_table_or_col i_category)=[$4], (tok_table_or_col i_brand)=[$3], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$0], (tok_table_or_col d_moy)=[$1], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $4, $3, $2, $0 ORDER BY $4 NULLS FIRST, $3 NULLS FIRST, $2 NULLS FIRST, $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $4, $3, $2 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(d_year=[$0], d_moy=[$1], cc_name=[$2], i_brand=[$3], i_category=[$4], $f5=[$5]) HiveAggregate(group=[{5, 6, 8, 10, 11}], agg#0=[sum($3)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out index f26eae0acc..650b7c068a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out @@ -142,8 +142,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), $5)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($3, $1), $5)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), $5)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($3, $1), $5), 3:DECIMAL(10, 0))]) - HiveJoin(condition=[AND(=($0, $4), BETWEEN(false, $3, $6, $7), BETWEEN(false, $1, $6, $7), BETWEEN(false, $5, *(0.9:DECIMAL(2, 1), $3), *(1.1:DECIMAL(2, 1), $3)), BETWEEN(false, $5, *(0.9:DECIMAL(2, 1), $1), *(1.1:DECIMAL(2, 1), $1)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(2, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(2, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($0, $4), BETWEEN(false, $3, $6, $7), BETWEEN(false, $1, $6, $7), BETWEEN(false, $5, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)), BETWEEN(false, $5, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{7}], agg#0=[sum($2)]) HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -202,7 +202,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(item_id=[$0], ws_item_rev=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) + HiveProject(item_id=[$0], ws_item_rev=[$1], *=[*(0.9:DECIMAL(1, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{7}], agg#0=[sum($2)]) HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out index 51f2ad97c6..90e147a0ad 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out @@ -185,7 +185,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_addr_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -6)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) @@ -207,7 +207,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], cs_sold_date_sk=[$1], cs_bill_addr_sk=[$2], cs_item_sk=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -6)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) @@ -229,7 +229,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_bill_addr_sk=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -6)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out index b95ccd9ea4..b684fb590a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out @@ -112,7 +112,7 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_promo_sk=[$4], ss_ext_sales_price=[$5], d_date_sk=[$6], i_item_sk=[$7], s_store_sk=[$8], p_promo_sk=[$9]) HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -129,7 +129,7 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[=($12, _UTF-16LE'Electronics')]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($27, -7:DECIMAL(1, 0))]) + HiveFilter(condition=[=($27, -7)]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[OR(=($9, _UTF-16LE'Y'), =($11, _UTF-16LE'Y'), =($8, _UTF-16LE'Y'))]) @@ -142,7 +142,7 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], s_store_sk=[$7]) HiveJoin(condition=[=($3, $7)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -158,6 +158,6 @@ HiveProject(promotions=[$0], total=[$1], _o__c2=[*(/(CAST($0):DECIMAL(15, 4), CA HiveFilter(condition=[=($12, _UTF-16LE'Electronics')]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($27, -7:DECIMAL(1, 0))]) + HiveFilter(condition=[=($27, -7)]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out index 0cecebb32d..4dddf0b131 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query63.q.out @@ -67,7 +67,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$1], avg_window_0=[$2]) - HiveFilter(condition=[CASE(>($2, 0:DECIMAL(1, 0)), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(2, 1)), false)]) + HiveFilter(condition=[CASE(>($2, 0), >(/(ABS(-($1, $2)), $2), 0.1:DECIMAL(1, 1)), false)]) HiveProject((tok_table_or_col i_manager_id)=[$0], (tok_function sum (tok_table_or_col ss_sales_price))=[$2], avg_window_0=[avg($2) OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_manager_id=[$0], d_moy=[$1], $f2=[$2]) HiveAggregate(group=[{4, 6}], agg#0=[sum($2)]) @@ -77,7 +77,7 @@ HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_manager_id=[$20]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))))]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'refernece':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), OR(AND(IN($12, _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Children':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'personal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'portable':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'refernece':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'self-help':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'scholaramalgamalg #14':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #7':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiunivamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'scholaramalgamalg #9':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(IN($12, _UTF-16LE'Women':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Music':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'accessories':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'classical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'fragrances':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($8, _UTF-16LE'amalgimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'edu packscholar #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'exportiimporto #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'importoamalg #1':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_moy=[$8]) HiveFilter(condition=[IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out index 486aef4f1c..c1453288df 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out @@ -294,7 +294,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'))]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'burnished':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'dim':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'steel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'navajo':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chocolate':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2001)]) @@ -355,7 +355,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'))]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'burnished':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'dim':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'steel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'navajo':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'chocolate':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2000)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out index af746307b7..b161723448 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out @@ -83,7 +83,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], *=[*(0.1:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveProject($f0=[$0], *=[*(0.1:DECIMAL(1, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out index 6e4ab9b627..2353cd9426 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out @@ -480,7 +480,7 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE')]) + HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AIRBORNE':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)]) @@ -499,7 +499,7 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE')]) + HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'AIRBORNE':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out index c1605999a8..63c776304c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out @@ -119,7 +119,7 @@ HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 1998, 1999, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood')]) + HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Wildwood':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[OR(=($4, 1), =($3, 2))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out index 8cb93b4271..78d36513ef 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out @@ -122,7 +122,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[ HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[c]) HiveProject(ca_address_sk=[$0], ca_state=[$8]) - HiveFilter(condition=[IN($8, _UTF-16LE'CO', _UTF-16LE'IL', _UTF-16LE'MN')]) + HiveFilter(condition=[IN($8, _UTF-16LE'CO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IL':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out index bcd25b3254..87c500132d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out @@ -95,7 +95,7 @@ HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4 HiveAggregate(group=[{1, 2, 7, 8}], agg#0=[sum($3)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4]) - HiveFilter(condition=[IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner')]) + HiveFilter(condition=[IN($9, _UTF-16LE'breakfast':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'dinner':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out index cb95169697..e5fc58aa55 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out @@ -84,9 +84,9 @@ HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last]) HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 2000, 2001, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), false), IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'))]) + HiveFilter(condition=[AND(>($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), false), IN($2, _UTF-16LE'>10000':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County')]) + HiveFilter(condition=[IN($23, _UTF-16LE'Mobile County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Maverick County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Kittitas County':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out index 6d983afc31..ee232fa4e3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out @@ -159,7 +159,7 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($3, 0)]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) @@ -172,7 +172,7 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f4=[$3]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($3, 0)]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out index 82f65782a6..3dac050924 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out @@ -158,7 +158,7 @@ CBO PLAN: HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$5], sales_cnt_diff=[$6], sales_amt_diff=[$7]) HiveSortLimit(sort0=[$6], dir0=[ASC], fetch=[100]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$10], sales_cnt_diff=[-($10, $4)], sales_amt_diff=[-($11, $5)]) - HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9:DECIMAL(2, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9:DECIMAL(1, 1)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out index aa1b4d9e2b..bd9e6fb47e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query8.q.out @@ -246,7 +246,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(ca_zip=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject(ca_zip=[substr($9, 1, 5)]) - HiveFilter(condition=[AND(IN(substr($9, 1, 5), _UTF-16LE'89436', _UTF-16LE'30868', _UTF-16LE'65085', _UTF-16LE'22977', _UTF-16LE'83927', _UTF-16LE'77557', _UTF-16LE'58429', _UTF-16LE'40697', _UTF-16LE'80614', _UTF-16LE'10502', _UTF-16LE'32779', _UTF-16LE'91137', _UTF-16LE'61265', _UTF-16LE'98294', _UTF-16LE'17921', _UTF-16LE'18427', _UTF-16LE'21203', _UTF-16LE'59362', _UTF-16LE'87291', _UTF-16LE'84093', _UTF-16LE'21505', _UTF-16LE'17184', _UTF-16LE'10866', _UTF-16LE'67898', _UTF-16LE'25797', _UTF-16LE'28055', _UTF-16LE'18377', _UTF-16LE'80332', _UTF-16LE'74535', _UTF-16LE'21757', _UTF-16LE'29742', _UTF-16LE'90885', _UTF-16LE'29898', _UTF-16LE'17819', _UTF-16LE'40811', _UTF-16LE'25990', _UTF-16LE'47513', _UTF-16LE'89531', _UTF-16LE'91068', _UTF-16LE'10391', _UTF-16LE'18846', _UTF-16LE'99223', _UTF-16LE'82637', _UTF-16LE'41368', _UTF-16LE'83658', _UTF-16LE'86199', _UTF-16LE'81625', _UTF-16LE'26696', _UTF-16LE'89338', _UTF-16LE'88425', _UTF-16LE'32200', _UTF-16LE'81427', _UTF-16LE'19053', _UTF-16LE'77471', _UTF-16LE'36610', _UTF-16LE'99823', _UTF-16LE'43276', _UTF-16LE'41249', _UTF-16LE'48584', _UTF-16LE'83550', _UTF-16LE'82276', _UTF-16LE'18842', _UTF-16LE'78890', _UTF-16LE'14090', _UTF-16LE'38123', _UTF-16LE'40936', _UTF-16LE'34425', _UTF-16LE'19850', _UTF-16LE'43286', _UTF-16LE'80072', _UTF-16LE'79188', _UTF-16LE'54191', _UTF-16LE'11395', _UTF-16LE'50497', _UTF-16LE'84861', _UTF-16LE'90733', _UTF-16LE'21068', _UTF-16LE'57666', _UTF-16LE'37119', _UTF-16LE'25004', _UTF-16LE'57835', _UTF-16LE'70067', _UTF-16LE'62878', _UTF-16LE'95806', _UTF-16LE'19303', _UTF-16LE'18840', _UTF-16LE'19124', _UTF-16LE'29785', _UTF-16LE'16737', _UTF-16LE'16022', _UTF-16LE'49613', _UTF-16LE'89977', _UTF-16LE'68310', _UTF-16LE'60069', _UTF-16LE'98360', _UTF-16LE'48649', _UTF-16LE'39050', _UTF-16LE'41793', _UTF-16LE'25002', _UTF-16LE'27413', _UTF-16LE'39736', _UTF-16LE'47208', _UTF-16LE'16515', _UTF-16LE'94808', _UTF-16LE'57648', _UTF-16LE'15009', _UTF-16LE'80015', _UTF-16LE'42961', _UTF-16LE'63982', _UTF-16LE'21744', _UTF-16LE'71853', _UTF-16LE'81087', _UTF-16LE'67468', _UTF-16LE'34175', _UTF-16LE'64008', _UTF-16LE'20261', _UTF-16LE'11201', _UTF-16LE'51799', _UTF-16LE'48043', _UTF-16LE'45645', _UTF-16LE'61163', _UTF-16LE'48375', _UTF-16LE'36447', _UTF-16LE'57042', _UTF-16LE'21218', _UTF-16LE'41100', _UTF-16LE'89951', _UTF-16LE'22745', _UTF-16LE'35851', _UTF-16LE'83326', _UTF-16LE'61125', _UTF-16LE'78298', _UTF-16LE'80752', _UTF-16LE'49858', _UTF-16LE'52940', _UTF-16LE'96976', _UTF-16LE'63792', _UTF-16LE'11376', _UTF-16LE'53582', _UTF-16LE'18717', _UTF-16LE'90226', _UTF-16LE'50530', _UTF-16LE'94203', _UTF-16LE'99447', _UTF-16LE'27670', _UTF-16LE'96577', _UTF-16LE'57856', _UTF-16LE'56372', _UTF-16LE'16165', _UTF-16LE'23427', _UTF-16LE'54561', _UTF-16LE'28806', _UTF-16LE'44439', _UTF-16LE'22926', _UTF-16LE'30123', _UTF-16LE'61451', _UTF-16LE'92397', _UTF-16LE'56979', _UTF-16LE'92309', _UTF-16LE'70873', _UTF-16LE'13355', _UTF-16LE'21801', _UTF-16LE'46346', _UTF-16LE'37562', _UTF-16LE'56458', _UTF-16LE'28286', _UTF-16LE'47306', _UTF-16LE'99555', _UTF-16LE'69399', _UTF-16LE'26234', _UTF-16LE'47546', _UTF-16LE'49661', _UTF-16LE'88601', _UTF-16LE'35943', _UTF-16LE'39936', _UTF-16LE'25632', _UTF-16LE'24611', _UTF-16LE'44166', _UTF-16LE'56648', _UTF-16LE'30379', _UTF-16LE'59785', _UTF-16LE'11110', _UTF-16LE'14329', _UTF-16LE'93815', _UTF-16LE'52226', _UTF-16LE'71381', _UTF-16LE'13842', _UTF-16LE'25612', _UTF-16LE'63294', _UTF-16LE'14664', _UTF-16LE'21077', _UTF-16LE'82626', _UTF-16LE'18799', _UTF-16LE'60915', _UTF-16LE'81020', _UTF-16LE'56447', _UTF-16LE'76619', _UTF-16LE'11433', _UTF-16LE'13414', _UTF-16LE'42548', _UTF-16LE'92713', _UTF-16LE'70467', _UTF-16LE'30884', _UTF-16LE'47484', _UTF-16LE'16072', _UTF-16LE'38936', _UTF-16LE'13036', _UTF-16LE'88376', _UTF-16LE'45539', _UTF-16LE'35901', _UTF-16LE'19506', _UTF-16LE'65690', _UTF-16LE'73957', _UTF-16LE'71850', _UTF-16LE'49231', _UTF-16LE'14276', _UTF-16LE'20005', _UTF-16LE'18384', _UTF-16LE'76615', _UTF-16LE'11635', _UTF-16LE'38177', _UTF-16LE'55607', _UTF-16LE'41369', _UTF-16LE'95447', _UTF-16LE'58581', _UTF-16LE'58149', _UTF-16LE'91946', _UTF-16LE'33790', _UTF-16LE'76232', _UTF-16LE'75692', _UTF-16LE'95464', _UTF-16LE'22246', _UTF-16LE'51061', _UTF-16LE'56692', _UTF-16LE'53121', _UTF-16LE'77209', _UTF-16LE'15482', _UTF-16LE'10688', _UTF-16LE'14868', _UTF-16LE'45907', _UTF-16LE'73520', _UTF-16LE'72666', _UTF-16LE'25734', _UTF-16LE'17959', _UTF-16LE'24677', _UTF-16LE'66446', _UTF-16LE'94627', _UTF-16LE'53535', _UTF-16LE'15560', _UTF-16LE'41967', _UTF-16LE'69297', _UTF-16LE'11929', _UTF-16LE'59403', _UTF-16LE'33283', _UTF-16LE'52232', _UTF-16LE'57350', _UTF-16LE'43933', _UTF-16LE'40921', _UTF-16LE'36635', _UTF-16LE'10827', _UTF-16LE'71286', _UTF-16LE'19736', _UTF-16LE'80619', _UTF-16LE'25251', _UTF-16LE'95042', _UTF-16LE'15526', _UTF-16LE'36496', _UTF-16LE'55854', _UTF-16LE'49124', _UTF-16LE'81980', _UTF-16LE'35375', _UTF-16LE'49157', _UTF-16LE'63512', _UTF-16LE'28944', _UTF-16LE'14946', _UTF-16LE'36503', _UTF-16LE'54010', _UTF-16LE'18767', _UTF-16LE'23969', _UTF-16LE'43905', _UTF-16LE'66979', _UTF-16LE'33113', _UTF-16LE'21286', _UTF-16LE'58471', _UTF-16LE'59080', _UTF-16LE'13395', _UTF-16LE'79144', _UTF-16LE'70373', _UTF-16LE'67031', _UTF-16LE'38360', _UTF-16LE'26705', _UTF-16LE'50906', _UTF-16LE'52406', _UTF-16LE'26066', _UTF-16LE'73146', _UTF-16LE'15884', _UTF-16LE'31897', _UTF-16LE'30045', _UTF-16LE'61068', _UTF-16LE'45550', _UTF-16LE'92454', _UTF-16LE'13376', _UTF-16LE'14354', _UTF-16LE'19770', _UTF-16LE'22928', _UTF-16LE'97790', _UTF-16LE'50723', _UTF-16LE'46081', _UTF-16LE'30202', _UTF-16LE'14410', _UTF-16LE'20223', _UTF-16LE'88500', _UTF-16LE'67298', _UTF-16LE'13261', _UTF-16LE'14172', _UTF-16LE'81410', _UTF-16LE'93578', _UTF-16LE'83583', _UTF-16LE'46047', _UTF-16LE'94167', _UTF-16LE'82564', _UTF-16LE'21156', _UTF-16LE'15799', _UTF-16LE'86709', _UTF-16LE'37931', _UTF-16LE'74703', _UTF-16LE'83103', _UTF-16LE'23054', _UTF-16LE'70470', _UTF-16LE'72008', _UTF-16LE'49247', _UTF-16LE'91911', _UTF-16LE'69998', _UTF-16LE'20961', _UTF-16LE'70070', _UTF-16LE'63197', _UTF-16LE'54853', _UTF-16LE'88191', _UTF-16LE'91830', _UTF-16LE'49521', _UTF-16LE'19454', _UTF-16LE'81450', _UTF-16LE'89091', _UTF-16LE'62378', _UTF-16LE'25683', _UTF-16LE'61869', _UTF-16LE'51744', _UTF-16LE'36580', _UTF-16LE'85778', _UTF-16LE'36871', _UTF-16LE'48121', _UTF-16LE'28810', _UTF-16LE'83712', _UTF-16LE'45486', _UTF-16LE'67393', _UTF-16LE'26935', _UTF-16LE'42393', _UTF-16LE'20132', _UTF-16LE'55349', _UTF-16LE'86057', _UTF-16LE'21309', _UTF-16LE'80218', _UTF-16LE'10094', _UTF-16LE'11357', _UTF-16LE'48819', _UTF-16LE'39734', _UTF-16LE'40758', _UTF-16LE'30432', _UTF-16LE'21204', _UTF-16LE'29467', _UTF-16LE'30214', _UTF-16LE'61024', _UTF-16LE'55307', _UTF-16LE'74621', _UTF-16LE'11622', _UTF-16LE'68908', _UTF-16LE'33032', _UTF-16LE'52868', _UTF-16LE'99194', _UTF-16LE'99900', _UTF-16LE'84936', _UTF-16LE'69036', _UTF-16LE'99149', _UTF-16LE'45013', _UTF-16LE'32895', _UTF-16LE'59004', _UTF-16LE'32322', _UTF-16LE'14933', _UTF-16LE'32936', _UTF-16LE'33562', _UTF-16LE'72550', _UTF-16LE'27385', _UTF-16LE'58049', _UTF-16LE'58200', _UTF-16LE'16808', _UTF-16LE'21360', _UTF-16LE'32961', _UTF-16LE'18586', _UTF-16LE'79307', _UTF-16LE'15492'), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) + HiveFilter(condition=[AND(IN(substr($9, 1, 5), _UTF-16LE'89436':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30868':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'65085':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22977':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83927':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'77557':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58429':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40697':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80614':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10502':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32779':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91137':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61265':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'98294':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17921':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18427':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21203':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59362':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'87291':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'84093':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21505':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17184':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10866':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67898':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25797':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28055':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18377':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80332':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'74535':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21757':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29742':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'90885':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29898':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17819':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40811':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25990':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47513':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89531':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91068':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10391':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18846':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99223':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82637':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41368':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83658':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86199':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81625':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26696':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89338':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88425':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32200':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81427':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19053':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'77471':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36610':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99823':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43276':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41249':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48584':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83550':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82276':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18842':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'78890':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14090':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38123':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'34425':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19850':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80072':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'79188':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54191':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11395':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50497':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'84861':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'90733':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21068':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57666':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'37119':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25004':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57835':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70067':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'62878':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95806':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19303':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18840':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19124':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29785':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16737':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16022':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49613':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89977':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'68310':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'60069':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'98360':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48649':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39050':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41793':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25002':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'27413':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39736':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47208':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16515':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94808':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57648':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15009':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80015':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'42961':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63982':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21744':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71853':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81087':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67468':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'34175':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'64008':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20261':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11201':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'51799':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48043':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45645':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61163':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48375':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57042':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21218':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41100':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89951':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22745':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35851':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83326':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61125':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'78298':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80752':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49858':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52940':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'96976':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63792':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11376':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'53582':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18717':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'90226':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50530':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94203':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'27670':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'96577':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57856':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56372':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16165':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'23427':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54561':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28806':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'44439':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22926':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30123':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61451':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92397':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56979':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92309':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70873':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13355':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21801':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'46346':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'37562':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56458':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47306':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99555':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69399':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26234':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47546':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49661':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88601':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35943':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25632':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'24611':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'44166':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56648':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30379':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59785':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11110':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14329':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'93815':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52226':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71381':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13842':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25612':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63294':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14664':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21077':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82626':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18799':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'60915':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81020':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'76619':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11433':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13414':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'42548':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92713':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70467':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30884':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'47484':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16072':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13036':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88376':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45539':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35901':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19506':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'65690':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'73957':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71850':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49231':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14276':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20005':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18384':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'76615':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11635':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38177':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55607':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41369':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95447':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58581':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58149':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91946':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33790':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'76232':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'75692':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95464':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22246':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'51061':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'56692':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'53121':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'77209':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15482':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10688':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14868':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45907':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'73520':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'72666':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25734':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'17959':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'24677':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'66446':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94627':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'53535':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15560':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'41967':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69297':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11929':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59403':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33283':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52232':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'57350':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43933':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40921':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36635':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10827':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'71286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19736':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80619':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25251':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'95042':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15526':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36496':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55854':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49124':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81980':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'35375':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49157':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63512':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28944':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14946':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36503':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54010':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18767':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'23969':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'43905':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'66979':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33113':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21286':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58471':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59080':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13395':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'79144':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70373':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67031':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'38360':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26705':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50906':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52406':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26066':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'73146':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15884':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'31897':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30045':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61068':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45550':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'92454':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13376':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14354':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19770':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'22928':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'97790':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'50723':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'46081':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30202':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14410':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20223':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88500':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67298':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'13261':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14172':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81410':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'93578':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83583':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'46047':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'94167':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'82564':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21156':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15799':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86709':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'37931':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'74703':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83103':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'23054':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70470':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'72008':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49247':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91911':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69998':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20961':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'70070':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'63197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'54853':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88191':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'91830':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'49521':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'19454':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'81450':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'89091':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'62378':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'25683':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61869':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'51744':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36580':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85778':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'36871':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48121':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'28810':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83712':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45486':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'67393':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'26935':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'42393':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'20132':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55349':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86057':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21309':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'80218':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'10094':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11357':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'48819':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'39734':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'40758':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30432':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21204':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'29467':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'30214':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'61024':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'55307':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'74621':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'11622':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'68908':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33032':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'52868':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99194':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99900':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'84936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'69036':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'99149':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'45013':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32895':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'59004':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32322':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'14933':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32936':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'33562':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'72550':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'27385':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58049':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'58200':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'16808':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'21360':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'32961':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'18586':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'79307':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'15492':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL(substr(substr($9, 1, 5), 1, 2)))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(ca_zip=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out index fa6a0e8163..581afc4d6c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out @@ -229,7 +229,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveFilter(condition=[>($5, 50)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) @@ -253,7 +253,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveFilter(condition=[>($5, 50)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) @@ -278,7 +278,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveFilter(condition=[>($5, 50)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out index fe05a6e300..0c7c3e1871 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -165,7 +165,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-10-15':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-11-10':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -187,7 +187,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-10-15':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-11-10':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -209,7 +209,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-10-15':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'1998-11-10':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out index 34320030ff..0e313b72bd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out @@ -199,17 +199,17 @@ HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], cd_education_status=[$2], ==[$3], =4=[$4], =5=[$5], =6=[$6], =7=[$7], =8=[$8], wr_item_sk=[$9], wr_refunded_cdemo_sk=[$10], wr_refunded_addr_sk=[$11], wr_returning_cdemo_sk=[$12], wr_reason_sk=[$13], wr_order_number=[$14], wr_fee=[$15], wr_refunded_cash=[$16], ca_address_sk=[$17], IN=[$18], IN2=[$19], IN3=[$20], cd_demo_sk0=[$21], cd_marital_status0=[$22], cd_education_status0=[$23]) HiveJoin(condition=[AND(=($1, $22), =($2, $23), =($0, $10))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'D':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'4 yr Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Primary':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($8), IS NOT NULL($4), IS NOT NULL($12))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN2=[IN($8, _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")], IN3=[IN($8, _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'GA':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'NM':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MT':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'OR':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'IN':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WI':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'MO':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'WV':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), =($10, _UTF-16LE'United States'))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'D':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'U':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'4 yr Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Primary':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out index 71e9f362df..64726a2a79 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out @@ -66,7 +66,7 @@ CBO PLAN: HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7]) HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)]) - HiveFilter(condition=[CASE(<>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false)]) + HiveFilter(condition=[CASE(<>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(1, 1)), false)]) HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) HiveAggregate(group=[{5, 6, 7, 9, 11, 12}], agg#0=[sum($3)]) @@ -77,7 +77,7 @@ HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_co HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical', _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'), IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics', _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical')), AND(IN($12, _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), IN($10, _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'))))]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'parenting':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'musical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'womens':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'birdal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($12, _UTF-16LE'Home':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Shoes':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), OR(AND(IN($12, _UTF-16LE'Home':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Electronics':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'wallpaper':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'parenting':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'musical':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(IN($12, _UTF-16LE'Shoes':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Men':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($10, _UTF-16LE'womens':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'birdal':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'pants':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_moy=[$8]) HiveFilter(condition=[=($6, 2000)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out index e19d1ca301..492d6ccb93 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out @@ -84,14 +84,14 @@ HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss= HiveJoin(condition=[=($9, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) + HiveFilter(condition=[=($11, -7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')))]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'W':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN($3, _UTF-16LE'Unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), IN(ROW($2, $3), ROW(_UTF-16LE'M':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Unknown':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), ROW(_UTF-16LE'W':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Advanced Degree':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out index 3df0e3dbe7..42db0d6e2a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out @@ -75,7 +75,7 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Sports':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'Books':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out index 51c1539acd..a60769c6a5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out @@ -1,3 +1,4 @@ +Warning: Shuffle Join MERGEJOIN[132][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item @@ -51,133 +52,152 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 14 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 1 (SIMPLE_EDGE) -Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_154] - Limit [LIM_153] (rows=100 width=299) + Reducer 6 vectorized + File Output Operator [FS_173] + Limit [LIM_172] (rows=100 width=299) Number of rows:100 - Select Operator [SEL_152] (rows=285780 width=299) + Select Operator [SEL_171] (rows=285780 width=299) Output:["_col0","_col1","_col2"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - Group By Operator [GBY_150] (rows=285780 width=299) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=285780 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_41] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_51] PartitionCols:_col0, _col1 - Group By Operator [GBY_40] (rows=3715140 width=299) + Group By Operator [GBY_50] (rows=3715140 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 - Select Operator [SEL_39] (rows=10246864 width=302) + Select Operator [SEL_49] (rows=10246864 width=310) Output:["_col3","_col7","_col8"] - Top N Key Operator [TNK_72] (rows=10246864 width=302) + Top N Key Operator [TNK_84] (rows=10246864 width=310) keys:_col8, _col7,top n:100 - Filter Operator [FIL_38] (rows=10246864 width=302) - predicate:(_col15 is not null or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) - Select Operator [SEL_37] (rows=10246864 width=302) - Output:["_col3","_col7","_col8","_col15"] - Merge Join Operator [MERGEJOIN_124] (rows=10246864 width=302) - Conds:RS_34._col0=RS_35._col6(Inner),Output:["_col3","_col7","_col8","_col12"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_120] (rows=462007 width=4) - Conds:RS_127._col1=RS_133._col0(Left Outer),Output:["_col0","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_127] - PartitionCols:_col1 - Select Operator [SEL_125] (rows=462000 width=104) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] - PartitionCols:_col0 - Select Operator [SEL_132] (rows=5 width=104) - Output:["_col0","_col1"] - Group By Operator [GBY_131] (rows=5 width=100) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] - PartitionCols:_col0 - Group By Operator [GBY_129] (rows=5 width=100) - Output:["_col0"],keys:i_item_id - Select Operator [SEL_128] (rows=11 width=104) - Output:["i_item_id"] - Filter Operator [FIL_126] (rows=11 width=104) - predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) - Please refer to the previous TableScan [TS_0] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_35] + Filter Operator [FIL_48] (rows=10246864 width=310) + predicate:(((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) + Select Operator [SEL_47] (rows=10246864 width=310) + Output:["_col3","_col7","_col8","_col14","_col16"] + Merge Join Operator [MERGEJOIN_137] (rows=10246864 width=310) + Conds:RS_44._col0=RS_45._col6(Inner),Output:["_col2","_col4","_col8","_col9","_col13"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_45] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_123] (rows=10246864 width=302) - Conds:RS_27._col0=RS_28._col2(Inner),Output:["_col3","_col4","_col6","_col8"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_122] (rows=10246864 width=119) - Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_136] (rows=10246864 width=302) + Conds:RS_34._col0=RS_35._col2(Inner),Output:["_col3","_col4","_col6","_col8"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_133] (rows=80000000 width=191) + Conds:RS_155._col1=RS_157._col0(Inner),Output:["_col0","_col3","_col4"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] + PartitionCols:_col1 + Select Operator [SEL_154] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_153] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_16] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_141] + SHUFFLE [RS_157] + PartitionCols:_col0 + Select Operator [SEL_156] (rows=40000000 width=191) + Output:["_col0","_col1","_col2"] + TableScan [TS_19] (rows=40000000 width=191) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_134] (rows=10246864 width=119) + Conds:RS_168._col0=RS_160._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_140] (rows=130 width=12) + Select Operator [SEL_159] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_139] (rows=130 width=12) + Filter Operator [FIL_158] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2)) - TableScan [TS_17] (rows=73049 width=12) + TableScan [TS_24] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_148] (rows=143930993 width=123) + Select Operator [SEL_167] (rows=143930993 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_147] (rows=143930993 width=123) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_14] (rows=144002668 width=123) + Filter Operator [FIL_166] (rows=143930993 width=123) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_21] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_146] - Group By Operator [GBY_145] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_165] + Group By Operator [GBY_164] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_142] (rows=130 width=4) + Select Operator [SEL_161] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_140] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_121] (rows=80000000 width=191) - Conds:RS_136._col1=RS_138._col0(Inner),Output:["_col0","_col3","_col4"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - PartitionCols:_col0 - Select Operator [SEL_137] (rows=40000000 width=191) - Output:["_col0","_col1","_col2"] - TableScan [TS_12] (rows=40000000 width=191) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] - PartitionCols:_col1 - Select Operator [SEL_135] (rows=80000000 width=8) + Please refer to the previous Select Operator [SEL_159] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_135] (rows=462007 width=12) + Conds:RS_41._col1=RS_152._col0(Left Outer),Output:["_col0","_col2","_col4"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_132] (rows=462000 width=112) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + Select Operator [SEL_138] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=80000000 width=8) - predicate:c_current_addr_sk is not null - TableScan [TS_9] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_149] + Group By Operator [GBY_148] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_147] + Group By Operator [GBY_146] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_145] (rows=11 width=4) + Filter Operator [FIL_144] (rows=11 width=4) + predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + TableScan [TS_2] (rows=462000 width=4) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=5 width=104) + Output:["_col0","_col1"] + Group By Operator [GBY_150] (rows=5 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col0 + Group By Operator [GBY_142] (rows=5 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_141] (rows=11 width=104) + Output:["i_item_id"] + Filter Operator [FIL_139] (rows=11 width=104) + predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + Please refer to the previous TableScan [TS_0] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out index 9d3505f8e7..aa85143704 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out @@ -180,7 +180,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_72] (rows=2570826 width=12) + Merge Join Operator [MERGEJOIN_72] (rows=2570825 width=12) Conds:RS_15._col1=RS_87._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_87] @@ -194,7 +194,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_71] (rows=57024544 width=22) + Merge Join Operator [MERGEJOIN_71] (rows=57024543 width=22) Conds:RS_84._col0=RS_76._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Map 6 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_76] @@ -208,10 +208,10 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_84] PartitionCols:_col0 - Select Operator [SEL_83] (rows=159705894 width=27) + Select Operator [SEL_83] (rows=159705891 width=27) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_82] (rows=159705894 width=233) - predicate:((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + Filter Operator [FIL_82] (rows=159705891 width=233) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] <-Reducer 7 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index c232f1713a..354771e75a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -193,7 +193,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=2570826 width=12) + Merge Join Operator [MERGEJOIN_94] (rows=2570825 width=12) Conds:RS_18._col1=RS_110._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_110] @@ -207,7 +207,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=57024544 width=25) + Merge Join Operator [MERGEJOIN_93] (rows=57024543 width=25) Conds:RS_107._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_99] @@ -221,10 +221,10 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_106] (rows=159705894 width=31) + Select Operator [SEL_106] (rows=159705891 width=31) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_105] (rows=159705894 width=233) - predicate:((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) + Filter Operator [FIL_105] (rows=159705891 width=233) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] <-Reducer 8 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/ppd_gby_join.q.out index a9b7593714..51a92a7cc3 100644 --- a/ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -33,10 +33,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -50,10 +50,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -307,10 +307,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -324,10 +324,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join.q.out b/ql/src/test/results/clientpositive/ppd_join.q.out index 1117d34abc..e6f28b846a 100644 --- a/ql/src/test/results/clientpositive/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_join.q.out @@ -30,10 +30,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -47,10 +47,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -560,10 +560,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -577,10 +577,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out b/ql/src/test/results/clientpositive/ppd_join2.q.out index c09fefe225..6380392630 100644 --- a/ql/src/test/results/clientpositive/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_join2.q.out @@ -37,10 +37,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + predicate: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -55,10 +55,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + filterExpr: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + predicate: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -103,10 +103,10 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src - filterExpr: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + filterExpr: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + predicate: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) @@ -1728,10 +1728,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + predicate: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1746,10 +1746,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + filterExpr: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + predicate: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1794,10 +1794,10 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src - filterExpr: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + filterExpr: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + predicate: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index 71bf59fbc1..4b1f881e6b 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -37,10 +37,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -54,10 +54,10 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -97,10 +97,10 @@ STAGE PLANS: Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1783,10 +1783,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1800,10 +1800,10 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1843,10 +1843,10 @@ STAGE PLANS: Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/ppd_join_filter.q.out index 051e676a0b..7acee17aa3 100644 --- a/ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -36,7 +36,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -386,7 +386,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -736,7 +736,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -1084,7 +1084,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 diff --git a/ql/src/test/results/clientpositive/semijoin2.q.out b/ql/src/test/results/clientpositive/semijoin2.q.out index c011ddf7e4..2a176e65a3 100644 --- a/ql/src/test/results/clientpositive/semijoin2.q.out +++ b/ql/src/test/results/clientpositive/semijoin2.q.out @@ -174,7 +174,7 @@ STAGE PLANS: window functions: window function definition alias: LEAD_window_0 - arguments: COALESCE((- 973),(- 684),515) + arguments: COALESCE(-973,-684,515) name: LEAD window function: GenericUDAFLeadEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -222,7 +222,7 @@ STAGE PLANS: window functions: window function definition alias: SUM_window_1 - arguments: COALESCE(62,(- 380),(- 435)) + arguments: COALESCE(62,-380,-435) name: SUM window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) diff --git a/ql/src/test/results/clientpositive/spark/cbo_limit.q.out b/ql/src/test/results/clientpositive/spark/cbo_limit.q.out index 4ff88b71ec..2fb1ba14f2 100644 --- a/ql/src/test/results/clientpositive/spark/cbo_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/cbo_limit.q.out @@ -179,7 +179,7 @@ HiveFilter(condition=[>($0, 1)]) HiveProject(c_int=[$0]) HiveSortLimit(fetch=[1]) HiveProject(c_int=[$2]) - HiveFilter(condition=[>($3, 1.0E0)]) + HiveFilter(condition=[>($3, 1E0)]) HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) PREHOOK: query: select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int @@ -205,7 +205,7 @@ HiveSortLimit(fetch=[0]) HiveProject(_o__c0=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject($f0=[true]) - HiveFilter(condition=[>($3, 1.0E0)]) + HiveFilter(condition=[>($3, 1E0)]) HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) PREHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index 244e24aba4..0cbb068313 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -60,10 +60,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((((value = 'val_400') or (value = 'val_500')) and ((key = 400) or (key = 450))) or (((value = 'val_100') or (value = 'val_200') or (value = 'val_300')) and ((key = 100) or (key = 150) or (key = 200)))) (type: boolean) + filterExpr: (((value) IN ('val_400', 'val_500') and (key) IN (400, 450)) or ((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((value = 'val_400') or (value = 'val_500')) and ((key = 400) or (key = 450))) or (((value = 'val_100') or (value = 'val_200') or (value = 'val_300')) and ((key = 100) or (key = 150) or (key = 200)))) (type: boolean) + predicate: (((value) IN ('val_400', 'val_500') and (key) IN (400, 450)) or ((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -72,13 +72,12 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((VALUE._col0 = 'val_100') or (VALUE._col0 = 'val_200') or (VALUE._col0 = 'val_300')) and ((KEY._col0 = 100) or (KEY._col0 = 150) or (KEY._col0 = 200))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -99,7 +98,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1_n1 Filter Operator - predicate: (((VALUE._col0 = 'val_400') or (VALUE._col0 = 'val_500')) and ((KEY._col0 = 400) or (KEY._col0 = 450))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -428,7 +427,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((value = 'val_400') or (value = 'val_500')) and ((key = 400) or (key = 450))) or (((value = 'val_100') or (value = 'val_200') or (value = 'val_300')) and ((key = 100) or (key = 150) or (key = 200)))) (type: boolean) + predicate: (((value) IN ('val_400', 'val_500') and (key) IN (400, 450)) or ((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -437,13 +436,12 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((VALUE._col0 = 'val_100') or (VALUE._col0 = 'val_200') or (VALUE._col0 = 'val_300')) and ((KEY._col0 = 100) or (KEY._col0 = 150) or (KEY._col0 = 200))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -464,7 +462,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1_n1 Filter Operator - predicate: (((VALUE._col0 = 'val_400') or (VALUE._col0 = 'val_500')) and ((KEY._col0 = 400) or (KEY._col0 = 450))) (type: boolean) + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out index 8e9d3b493f..a2fcf77991 100644 --- a/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out @@ -35,10 +35,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((value = 'val_105') and (key = '105')) (type: boolean) + filterExpr: ((key = '105') and (value = 'val_105')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = 'val_105') and (key = '105')) (type: boolean) + predicate: ((key = '105') and (value = 'val_105')) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out index f799581f7b..e2ad67b741 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 593563 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -75,24 +75,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 9557 Data size: 461644 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 7509 Data size: 362716 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 9557 Data size: 461644 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 7509 Data size: 362716 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9557 Data size: 461644 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7509 Data size: 362716 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index 92325145b9..378ed6ec05 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -38,10 +38,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -58,10 +58,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -312,10 +312,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -332,10 +332,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_join.q.out index 985fb82108..f39023f249 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -35,10 +35,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -55,10 +55,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -575,10 +575,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -595,10 +595,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out index 759fd869bd..8df556c4b2 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -42,10 +42,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + predicate: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -63,10 +63,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and value is not null and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and value is not null and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1'))) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -84,10 +84,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + filterExpr: (value is not null and (sqrt(key) <> 13.0D) and (key <> '306')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + predicate: (value is not null and (sqrt(key) <> 13.0D) and (key <> '306')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -1736,10 +1736,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + predicate: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -1757,10 +1757,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and value is not null and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and value is not null and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1'))) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -1778,10 +1778,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + filterExpr: (value is not null and (sqrt(key) <> 13.0D) and (key <> '306')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + predicate: (value is not null and (sqrt(key) <> 13.0D) and (key <> '306')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out index 2ccd76a563..82fd00b50e 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -41,10 +41,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -61,10 +61,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -81,10 +81,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -1775,10 +1775,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -1795,10 +1795,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -1815,10 +1815,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index a78553de23..732b46f18d 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -36,7 +36,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -350,7 +350,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -664,7 +664,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -976,7 +976,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out b/ql/src/test/results/clientpositive/spark/subquery_views.q.out index 2d4aae2759..a9e62e4740 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out @@ -168,12 +168,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: ((key < '11') and (value > 'val_11')) (type: boolean) + filterExpr: ((value > 'val_11') and (key < '11')) (type: boolean) properties: insideView TRUE Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(key) @@ -194,12 +194,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: ((key < '11') and (value > 'val_11')) (type: boolean) + filterExpr: ((value > 'val_11') and (key < '11')) (type: boolean) properties: insideView TRUE Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 36b1d81d4c..a31154987f 100644 --- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -264,7 +264,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + filterExpr: ((cdecimal1) IN (2365.8945945946, -3367.6517567568) or (cdecimal1) IN (881.0135135135)) (type: boolean) Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -272,8 +272,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterDecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) - predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterDecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, -3367.6517567568]), FilterDecimalColumnInList(col 1:decimal(20,10), values [881.0135135135])) + predicate: ((cdecimal1) IN (2365.8945945946, -3367.6517567568) or (cdecimal1) IN (881.0135135135)) (type: boolean) Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdecimal1 (type: decimal(20,10)) @@ -1281,13 +1281,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + expressions: ((cdecimal1) IN (2365.8945945946, -3367.6517567568) or (cdecimal1) IN (881.0135135135)) (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean + projectedOutputColumnNums: [7] + selectExpressions: ColOrCol(col 5:boolean, col 6:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, -3367.6517567568]) -> 5:boolean, DecimalColumnInList(col 1:decimal(20,10), values [881.0135135135]) -> 6:boolean) -> 7:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1295,7 +1295,7 @@ STAGE PLANS: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:boolean + keyExpressions: col 7:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] diff --git a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out index e04c5aede6..a6c583e015 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -76,24 +76,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 9557 Data size: 2261694 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 7509 Data size: 1777028 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 9557 Data size: 2261694 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 7509 Data size: 1777028 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9557 Data size: 2261694 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7509 Data size: 1777028 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -113,7 +113,7 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, decimal(11,4)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 0c44a1847c..bbd18a7ce6 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -1656,7 +1656,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) + filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1664,9 +1664,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 14:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 14:decimal(7,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterLongColEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 17:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 17:decimal(7,2)/DECIMAL_64), FilterStringColLikeStringScalar(col 7:string, pattern ss))) - predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) - Statistics: Num rows: 10922 Data size: 2584725 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 14:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 14:decimal(7,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 17:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 17:decimal(7,2)/DECIMAL_64), FilterStringColLikeStringScalar(col 7:string, pattern ss))) + predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) + Statistics: Num rows: 4778 Data size: 1130728 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -1675,7 +1675,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 11, 9, 5, 4, 3, 1, 10, 18, 19, 20, 21, 23, 24, 25, 28, 31, 33, 35, 36, 38] selectExpressions: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 19:bigint, LongColUnaryMinus(col 3:bigint) -> 20:bigint, DoubleColUnaryMinus(col 4:float) -> 21:float, LongColAddLongColumn(col 22:bigint, col 3:bigint)(children: LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 22:bigint) -> 23:bigint, DoubleColDivideDoubleColumn(col 5:double, col 5:double) -> 24:double, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 26:bigint, col 27:bigint)(children: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 26:int, LongColUnaryMinus(col 3:bigint) -> 27:bigint) -> 28:bigint, DoubleColAddDoubleColumn(col 29:double, col 30:double)(children: DoubleColUnaryMinus(col 5:double) -> 29:double, CastLongToDouble(col 3:bigint) -> 30:double) -> 31:double, DecimalScalarDivideDecimalColumn(val -1.389, col 32:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 32:decimal(3,0)) -> 33:decimal(8,7), DoubleColModuloDoubleColumn(col 34:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 34:double) -> 35:double, LongColUnaryMinus(col 1:smallint) -> 36:smallint, LongColAddLongColumn(col 1:int, col 37:int)(children: col 1:smallint, LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 37:int) -> 38:int - Statistics: Num rows: 10922 Data size: 2584725 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1130728 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) null sort order: zzzzzzzzzzzzzzzzzzzzz @@ -1684,7 +1684,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10922 Data size: 2584725 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1130728 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized @@ -1713,7 +1713,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6, 1, 21, 2, 5, 3, 4, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 10922 Data size: 2584725 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4778 Data size: 1130728 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 75 Limit Vectorization: @@ -1964,7 +1964,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) + filterExpr: (((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (csmallint < -6432S)) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1972,8 +1972,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterDecimal64ColLessEqualDecimal64Scalar(col 13:decimal(13,3)/DECIMAL_64, val -1389)(children: CastLongToDecimal64(col 2:int) -> 13:decimal(13,3)/DECIMAL_64), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:int, val -6432)(children: col 1:smallint)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) - predicate: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimal64ColLessEqualDecimal64Scalar(col 13:decimal(13,3)/DECIMAL_64, val -1389)(children: CastLongToDecimal64(col 2:int) -> 13:decimal(13,3)/DECIMAL_64), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:smallint, val -6432)), FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) + predicate: (((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (csmallint < -6432S)) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) Statistics: Num rows: 3868 Data size: 915374 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0D) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175D) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) @@ -2214,7 +2214,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((UDFToInteger(ctinyint) <= cint) and (UDFToInteger(csmallint) >= -257) and (UDFToDouble(cint) >= cdouble)) (type: boolean) + filterExpr: ((csmallint >= -257S) and (UDFToInteger(ctinyint) <= cint) and (UDFToDouble(cint) >= cdouble)) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -2222,8 +2222,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterLongColGreaterEqualLongScalar(col 1:int, val -257)(children: col 1:smallint), FilterDoubleColGreaterEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double)) - predicate: ((UDFToInteger(ctinyint) <= cint) and (UDFToInteger(csmallint) >= -257) and (UDFToDouble(cint) >= cdouble)) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1:smallint, val -257), FilterLongColLessEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double)) + predicate: ((csmallint >= -257S) and (UDFToInteger(ctinyint) <= cint) and (UDFToDouble(cint) >= cdouble)) (type: boolean) Statistics: Num rows: 455 Data size: 107677 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) @@ -2497,7 +2497,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) + filterExpr: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -2505,22 +2505,22 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 2563.58), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)(children: col 2:int), FilterLongColLessLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterDoubleColLessDoubleScalar(col 4:float, val -5638.14990234375)), FilterDecimal64ColEqualDecimal64Scalar(col 13:decimal(6,2)/DECIMAL_64, val 256358)(children: CastLongToDecimal64(col 0:tinyint) -> 13:decimal(6,2)/DECIMAL_64), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 14:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(21,2), val -5638.15)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(21,2))))) - predicate: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) - Statistics: Num rows: 2654 Data size: 628077 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 2563.58), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)(children: col 2:int), FilterLongColLessLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterDoubleColLessDoubleScalar(col 4:float, val -5638.14990234375)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 3:bigint) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(21,2), val -5638.15)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(21,2))))) + predicate: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) + Statistics: Num rows: 606 Data size: 143411 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cfloat (type: float), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 4, 16] - selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 16:double - Statistics: Num rows: 2654 Data size: 628077 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 4, 15] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double + Statistics: Num rows: 606 Data size: 143411 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), sum(_col0), count(_col0), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double + aggregators: VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double @@ -2531,7 +2531,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2654 Data size: 628077 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 143411 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z @@ -2541,7 +2541,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2654 Data size: 628077 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 606 Data size: 143411 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double) Execution mode: vectorized Map Vectorization: @@ -2575,7 +2575,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1327 Data size: 314038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 71705 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2584,7 +2584,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 12, 20, 28, 4, 37, 55, 5, 59, 68, 73, 81, 82, 2, 84] selectExpressions: DoubleColDivideLongColumn(col 8:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 8:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 12:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 13:double) -> 14:double) -> 15:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 27:double)(children: DoubleColDivideLongColumn(col 23:double, col 26:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 21:double) -> 22:double) -> 23:double, IfExprNullCondExpr(col 24:boolean, null, col 25:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 24:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 25:bigint) -> 26:bigint) -> 27:double) -> 28:double, DoubleColAddDoubleScalar(col 36:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 35:double)(children: DoubleColDivideLongColumn(col 31:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 31:double, IfExprNullCondExpr(col 32:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 32:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 35:double) -> 36:double) -> 37:double, DoubleColMultiplyDoubleColumn(col 45:double, col 54:double)(children: DoubleColUnaryMinus(col 44:double)(children: DoubleColDivideLongColumn(col 40:double, col 43:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 38:double) -> 39:double) -> 40:double, IfExprNullCondExpr(col 41:boolean, null, col 42:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 41:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 42:bigint) -> 43:bigint) -> 44:double) -> 45:double, DoubleColAddDoubleScalar(col 53:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 52:double)(children: DoubleColDivideLongColumn(col 48:double, col 51:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 47:double)(children: DoubleColDivideLongColumn(col 46:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 46:double) -> 47:double) -> 48:double, IfExprNullCondExpr(col 49:boolean, null, col 50:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 49:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 50:bigint) -> 51:bigint) -> 52:double) -> 53:double) -> 54:double) -> 55:double, DoubleColDivideLongColumn(col 58:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 57:double)(children: DoubleColDivideLongColumn(col 56:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double, DoubleColSubtractDoubleColumn(col 0:double, col 67:double)(children: DoubleColUnaryMinus(col 66:double)(children: DoubleColDivideLongColumn(col 62:double, col 65:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 61:double)(children: DoubleColDivideLongColumn(col 60:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 60:double) -> 61:double) -> 62:double, IfExprNullCondExpr(col 63:boolean, null, col 64:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 63:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 64:bigint) -> 65:bigint) -> 66:double) -> 67:double) -> 68:double, FuncPowerDoubleToDouble(col 72:double)(children: DoubleColDivideLongColumn(col 71:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 70:double)(children: DoubleColDivideLongColumn(col 69:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 69:double) -> 70:double) -> 71:double) -> 72:double) -> 73:double, DoubleColAddDoubleColumn(col 0:double, col 80:double)(children: DoubleColDivideLongColumn(col 76:double, col 79:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 75:double)(children: DoubleColDivideLongColumn(col 74:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 74:double) -> 75:double) -> 76:double, IfExprNullCondExpr(col 77:boolean, null, col 78:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 77:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 78:bigint) -> 79:bigint) -> 80:double) -> 81:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 82:double, DoubleScalarModuloDoubleColumn(val -863.257, col 83:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 83:double) -> 84:double - Statistics: Num rows: 1327 Data size: 314038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 71705 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z @@ -2593,7 +2593,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1327 Data size: 314038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 71705 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized @@ -2611,13 +2611,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 1327 Data size: 314038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 71705 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1327 Data size: 314038 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 303 Data size: 71705 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out b/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out index 8546612583..a4f2fe472c 100644 --- a/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out +++ b/ql/src/test/results/clientpositive/stat_estimate_related_col.q.out @@ -313,10 +313,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - filterExpr: (((b = 2) or (b = 3)) and (b = 2)) (type: boolean) + filterExpr: ((b) IN (2, 3) and (b = 2)) (type: boolean) Statistics: Num rows: 5/5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((b = 2) or (b = 3)) and (b = 2)) (type: boolean) + predicate: ((b) IN (2, 3) and (b = 2)) (type: boolean) Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) @@ -388,15 +388,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t8 - filterExpr: (((b = 2) or (b = 3)) and (b = 2)) (type: boolean) + filterExpr: ((b) IN (2, 3) and (b = 2)) (type: boolean) Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((b = 2) or (b = 3)) and (b = 2)) (type: boolean) - Statistics: Num rows: 3/8 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((b) IN (2, 3) and (b = 2)) (type: boolean) + Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 3/8 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: 2 (type: int) diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out index 49c4b564e7..18999c5cd9 100644 --- a/ql/src/test/results/clientpositive/structin.q.out +++ b/ql/src/test/results/clientpositive/structin.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t11_n1 - filterExpr: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695',1)) (type: boolean) + filterExpr: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695','1')) (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695',1)) (type: boolean) + predicate: (struct(id,lineid)) IN (const struct('1234-1111-0074578664','3'), const struct('1234-1111-0074578695','1')) (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: string), lineid (type: string) @@ -112,6 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out index 740a270169..65115f3547 100644 --- a/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out @@ -344,7 +344,7 @@ POSTHOOK: Input: default@alter_coltype_temp@dt=100/ts=6.30 #### A masked pattern was here #### OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype_temp` -WHERE `ts` = 3.0 AND `dt` = 100 +WHERE `ts` = 3 AND `dt` = 100 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out index 2feb6dd738..784f4dc5e6 100644 --- a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out +++ b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out @@ -105,10 +105,10 @@ STAGE PLANS: Processor Tree: TableScan alias: src - filterExpr: (to_unix_timestamp(key) > 10L) (type: boolean) + filterExpr: (unix_timestamp(key) > 10L) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (to_unix_timestamp(key) > 10L) (type: boolean) + predicate: (unix_timestamp(key) > 10L) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/vector_case_when_2.q.out index a7b46fd8aa..f38ed9eb5b 100644 --- a/ql/src/test/results/clientpositive/vector_case_when_2.q.out +++ b/ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -998,13 +998,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:q548284:int, 1:ROW__ID:struct] Select Operator - expressions: q548284 (type: int), CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END (type: decimal(2,1)) + expressions: q548284 (type: int), CAST( CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END AS decimal(11,1)) (type: decimal(11,1)) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 8] - selectExpressions: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1) + projectedOutputColumnNums: [0, 9] + selectExpressions: CastDecimalToDecimal(col 8:decimal(2,1))(children: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1)) -> 9:decimal(11,1) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1017,7 +1017,7 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: decimal(2,1)) + value expressions: _col1 (type: decimal(11,1)) Execution mode: vectorized Map Vectorization: enabled: true @@ -1033,14 +1033,14 @@ STAGE PLANS: includeColumns: [0] dataColumns: q548284:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1)] + scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1), decimal(11,1)] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(2,1)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(11,1)) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/vector_const.q.out b/ql/src/test/results/clientpositive/vector_const.q.out index e114a44c72..25870ae430 100644 --- a/ql/src/test/results/clientpositive/vector_const.q.out +++ b/ql/src/test/results/clientpositive/vector_const.q.out @@ -37,7 +37,7 @@ STAGE PLANS: alias: varchar_const_1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'FF' (type: varchar(4)) + expressions: 'FF' (type: varchar(3)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_10.q.out b/ql/src/test/results/clientpositive/vectorization_10.q.out index 9383bf7130..a4b8687325 100644 --- a/ql/src/test/results/clientpositive/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/vectorization_10.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -73,24 +73,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 11590 Data size: 2232584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -110,7 +110,7 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, decimal(11,4)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] Stage: Stage-0 Fetch Operator