diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index dc3781a3ce..75bffe3333 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1361,6 +1361,13 @@ public static boolean isOpPositive(ExprNodeDesc desc) { return GenericUDFOPPositive.class == getGenericUDFClassFromExprDesc(desc); } + /** + * Returns whether the exprNodeDesc is a node of "negative". + */ + public static boolean isOpNegative(ExprNodeDesc desc) { + return GenericUDFOPNegative.class == getGenericUDFClassFromExprDesc(desc); + } + /** * Returns whether the exprNodeDesc is node of "cast". */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java index 8e74f8a8b8..8bd098b3d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RowSchema.java @@ -65,16 +65,15 @@ public ColumnInfo getColumnInfo(String tabAlias, String alias) { for (ColumnInfo columnInfo: this.signature) { if (columnInfo.getTabAlias() == null) { if (tabAlias == null) { - if(columnInfo.getAlias() != null && alias != null && + if(columnInfo.getAlias() != null && columnInfo.getAlias().equals(alias)) { return columnInfo; } } - } - else { + } else { if (tabAlias != null) { if (columnInfo.getTabAlias().equals(tabAlias) && - columnInfo.getAlias() != null && alias != null && + columnInfo.getAlias() != null && columnInfo.getAlias().equals(alias)) { return columnInfo; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 3346f416e9..2d3daeb771 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -235,7 +235,6 @@ private static ExprNodeConstantDesc typeCast(ExprNodeDesc desc, TypeInfo ti, boo } public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) { - GenericUDF udf = funcDesc.getGenericUDF(); if (!isConstantFoldableUdf(udf, funcDesc.getChildren())) { return funcDesc; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java index e04a2b1b97..8f5f4e9237 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java @@ -111,7 +111,7 @@ protected abstract void generatePredicate(NodeProcessorCtx procCtx, FilterOperat * @throws UDFArgumentException */ protected void addPruningPred(Map opToPrunner, - TableScanOperator top, ExprNodeDesc new_pruner_pred) throws UDFArgumentException { + TableScanOperator top, ExprNodeDesc new_pruner_pred) throws SemanticException { ExprNodeDesc old_pruner_pred = opToPrunner.get(top); ExprNodeDesc pruner_pred = null; if (old_pruner_pred != null) { @@ -139,7 +139,7 @@ protected void addPruningPred(Map opToPrunner, */ protected void addPruningPred(Map> opToPrunner, TableScanOperator top, ExprNodeDesc new_pruner_pred, Partition part) - throws UDFArgumentException { + throws SemanticException { Map oldPartToPruner = opToPrunner.get(top); Map partToPruner = null; ExprNodeDesc pruner_pred = null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 4a18cfef54..15137b2541 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -53,6 +53,7 @@ import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexUtil.FixNullabilityShuttle; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlAggFunction; @@ -98,9 +99,6 @@ public class HiveCalciteUtil { - private static final Logger LOG = LoggerFactory.getLogger(HiveCalciteUtil.class); - - /** * Get list of virtual columns from the given list of projections. *

@@ -1062,6 +1060,25 @@ public RexNode apply(RelDataTypeField input) { return HiveProject.create(input, copyInputRefs, null); } + public static boolean isConstant(RexNode expr) { + if (expr instanceof RexCall) { + RexCall call = (RexCall) expr; + if (call.getOperator() == SqlStdOperatorTable.ROW || + call.getOperator() == SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR || + call.getOperator() == SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR) { + // We check all operands + for (RexNode node : ((RexCall) expr).getOperands()) { + if (!isConstant(node)) { + return false; + } + } + // All literals + return true; + } + } + return expr.isA(SqlKind.LITERAL); + } + /** * Walks over an expression and determines whether it is constant. */ @@ -1157,4 +1174,14 @@ public Void visitInputRef(RexInputRef inputRef) { return inputRefSet; } } + + /** Fixes up the type of all {@link RexInputRef}s in an + * expression to match differences in nullability. + * + *

Throws if there any greater inconsistencies of type. */ + public static RexNode fixUp(final RexBuilder rexBuilder, + RexNode node, final List fieldTypes) { + return new FixNullabilityShuttle(rexBuilder, fieldTypes).apply(node); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java index 1dede0f88a..08b4e8db4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java @@ -15,44 +15,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.ql.optimizer.calcite; import java.util.HashSet; import java.util.List; -import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexExecutorImpl; import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - +/** + * Executor for {@link RexNode} based on Hive semantics. + */ public class HiveRexExecutorImpl extends RexExecutorImpl { private static final Logger LOG = LoggerFactory.getLogger(HiveRexExecutorImpl.class); - private final RelOptCluster cluster; - public HiveRexExecutorImpl(RelOptCluster cluster) { + public HiveRexExecutorImpl() { super(null); - this.cluster = cluster; } @Override public void reduce(RexBuilder rexBuilder, List constExps, List reducedValues) { - RexNodeConverter rexNodeConverter = new RexNodeConverter(cluster); + RexNodeConverter rexNodeConverter = new RexNodeConverter(rexBuilder, rexBuilder.getTypeFactory()); for (RexNode rexNode : constExps) { // initialize the converter ExprNodeConverter converter = new ExprNodeConverter("", null, null, null, - new HashSet(), cluster.getTypeFactory()); + new HashSet<>(), rexBuilder.getTypeFactory()); // convert RexNode to ExprNodeGenericFuncDesc ExprNodeDesc expr = rexNode.accept(converter); if (expr instanceof ExprNodeGenericFuncDesc) { @@ -60,20 +59,27 @@ public void reduce(RexBuilder rexBuilder, List constExps, List ExprNodeDesc constant = ConstantPropagateProcFactory .foldExpr((ExprNodeGenericFuncDesc) expr); if (constant != null) { - try { - // convert constant back to RexNode - reducedValues.add(rexNodeConverter.convert(constant)); - } catch (Exception e) { - LOG.warn(e.getMessage()); - reducedValues.add(rexNode); - } + addExpressionToList(constant, rexNode, rexNodeConverter, reducedValues); } else { reducedValues.add(rexNode); } + } else if (expr instanceof ExprNodeConstantDesc) { + addExpressionToList(expr, rexNode, rexNodeConverter, reducedValues); } else { reducedValues.add(rexNode); } } } + private void addExpressionToList(ExprNodeDesc reducedExpr, RexNode originalExpr, + RexNodeConverter rexNodeConverter, List reducedValues) { + try { + // convert constant back to RexNode + reducedValues.add(rexNodeConverter.convert(reducedExpr)); + } catch (Exception e) { + LOG.warn(e.getMessage()); + reducedValues.add(originalExpr); + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java index 4a5c9cc567..5e301fdbcb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTypeSystemImpl.java @@ -78,6 +78,7 @@ public int getDefaultPrecision(SqlTypeName typeName) { // Binary doesn't need any sizes; Decimal has the default of 10. case BINARY: case VARBINARY: + return RelDataType.PRECISION_NOT_SPECIFIED; case TIME: case TIMESTAMP: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRexExprList.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRexExprList.java new file mode 100644 index 0000000000..707bcca0b5 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveRexExprList.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBiVisitor; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitor; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; + +/** + * This class represents the equivalent to {@link ExprNodeColumnListDesc} + * in a Calcite expression. It is not supposed to be used through planning + * and should be immediately expanded after it has been generated by + * the parser. + */ +public class HiveRexExprList extends RexNode { + + final List expressions = new ArrayList<>(); + + public void addExpression(RexNode expression) { + expressions.add(expression); + } + + public List getExpressions() { + return expressions; + } + + @Override + public RelDataType getType() { + throw new UnsupportedOperationException(); + } + + @Override + public R accept(RexVisitor visitor) { + throw new UnsupportedOperationException(); + } + + @Override + public R accept(RexBiVisitor visitor, P arg) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof HiveRexExprList) { + return this.expressions.equals(((HiveRexExprList) obj).expressions); + } + return false; + } + + @Override + public int hashCode() { + return expressions.hashCode(); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index cf104af00b..c6de339fd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Set; +import org.apache.calcite.avatica.util.ByteString; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -58,7 +59,6 @@ import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.RexVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter.HiveNlsString; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; @@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -227,160 +228,165 @@ public ExprNodeDesc visitCall(RexCall call) { @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { + return toExprNodeConstantDesc(literal); + } + + public static ExprNodeConstantDesc toExprNodeConstantDesc(RexLiteral literal) { RelDataType lType = literal.getType(); if (RexLiteral.value(literal) == null) { switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, null); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, null); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, null); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, null); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, null); - case FLOAT: - case REAL: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, null); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, null); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, null); - case TIME: - case TIMESTAMP: - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, null); - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - HiveConf conf; - try { - conf = Hive.get().getConf(); - } catch (HiveException e) { - throw new RuntimeException(e); - } - return new ExprNodeConstantDesc( - TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), null); - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, null); - case DECIMAL: - return new ExprNodeConstantDesc( - TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), lType.getScale()), null); - case VARCHAR: - case CHAR: - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null); - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, null); - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, null); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, null); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, null); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, null); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, null); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, null); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, null); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, null); + case TIME: + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, null); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + HiveConf conf; + try { + conf = Hive.get().getConf(); + } catch (HiveException e) { + throw new RuntimeException(e); + } + return new ExprNodeConstantDesc( + TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), null); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, null); + case DECIMAL: + return new ExprNodeConstantDesc( + TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), lType.getScale()), null); + case VARCHAR: + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null); + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, null); + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); } } else { switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral - .booleanValue(literal))); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal - .getValue3()).byteValue())); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, - Short.valueOf(((Number) literal.getValue3()).shortValue())); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, - Integer.valueOf(((Number) literal.getValue3()).intValue())); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal - .getValue3()).longValue())); - case FLOAT: - case REAL: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, - Float.valueOf(((Number) literal.getValue3()).floatValue())); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, - Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - Date.valueOf(literal.getValueAs(DateString.class).toString())); - case TIME: - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, - Timestamp.valueOf(literal.getValueAs(TimeString.class).toString())); - case TIMESTAMP: - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, - Timestamp.valueOf(literal.getValueAs(TimestampString.class).toString())); - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - HiveConf conf; - try { - conf = Hive.get().getConf(); - } catch (HiveException e) { - throw new RuntimeException(e); - } - // Calcite stores timestamp with local time-zone in UTC internally, thus - // when we bring it back, we need to add the UTC suffix. - return new ExprNodeConstantDesc(TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), - TimestampTZUtil.parse(literal.getValueAs(TimestampString.class).toString() + " UTC")); - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); - case DECIMAL: - return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), - lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); - case VARCHAR: - case CHAR: { - if (literal.getValue() instanceof HiveNlsString) { - HiveNlsString mxNlsString = (HiveNlsString) literal.getValue(); - switch (mxNlsString.interpretation) { - case STRING: - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); - case CHAR: { - int precision = lType.getPrecision(); - HiveChar value = new HiveChar((String) literal.getValue3(), precision); - return new ExprNodeConstantDesc(new CharTypeInfo(precision), value); - } - case VARCHAR: { - int precision = lType.getPrecision(); - HiveVarchar value = new HiveVarchar((String) literal.getValue3(), precision); - return new ExprNodeConstantDesc(new VarcharTypeInfo(precision), value); + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral + .booleanValue(literal))); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal + .getValue3()).byteValue())); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + Short.valueOf(((Number) literal.getValue3()).shortValue())); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + Integer.valueOf(((Number) literal.getValue3()).intValue())); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal + .getValue3()).longValue())); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + Float.valueOf(((Number) literal.getValue3()).floatValue())); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + Double.valueOf(((Number) literal.getValue3()).doubleValue())); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + Date.valueOf(literal.getValueAs(DateString.class).toString())); + case TIME: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + Timestamp.valueOf(literal.getValueAs(TimeString.class).toString())); + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + Timestamp.valueOf(literal.getValueAs(TimestampString.class).toString())); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + HiveConf conf; + try { + conf = Hive.get().getConf(); + } catch (HiveException e) { + throw new RuntimeException(e); } + // Calcite stores timestamp with local time-zone in UTC internally, thus + // when we bring it back, we need to add the UTC suffix. + return new ExprNodeConstantDesc(TypeInfoFactory.getTimestampTZTypeInfo(conf.getLocalTimeZone()), + TimestampTZUtil.parse(literal.getValueAs(TimestampString.class).toString() + " UTC")); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, + literal.getValueAs(ByteString.class).getBytes()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), + lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); + case VARCHAR: + case CHAR: { + if (literal.getValue() instanceof HiveNlsString) { + HiveNlsString mxNlsString = (HiveNlsString) literal.getValue(); + switch (mxNlsString.interpretation) { + case STRING: + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); + case CHAR: { + int precision = lType.getPrecision(); + HiveChar value = new HiveChar((String) literal.getValue3(), precision); + return new ExprNodeConstantDesc(new CharTypeInfo(precision), value); + } + case VARCHAR: { + int precision = lType.getPrecision(); + HiveVarchar value = new HiveVarchar((String) literal.getValue3(), precision); + return new ExprNodeConstantDesc(new VarcharTypeInfo(precision), value); + } + } } + throw new RuntimeException("varchar/string/char values must use HiveNlsString for correctness"); } - throw new RuntimeException("varchar/string/char values must use HiveNlsString for correctness"); - } - case INTERVAL_YEAR: - case INTERVAL_MONTH: - case INTERVAL_YEAR_MONTH: { - BigDecimal monthsBd = (BigDecimal) literal.getValue(); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - new HiveIntervalYearMonth(monthsBd.intValue())); - } - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: { - BigDecimal millisBd = (BigDecimal) literal.getValue(); - // Calcite literal is in millis, we need to convert to seconds - BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(secsBd)); - } - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); + case INTERVAL_YEAR: + case INTERVAL_MONTH: + case INTERVAL_YEAR_MONTH: { + BigDecimal monthsBd = (BigDecimal) literal.getValue(); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + new HiveIntervalYearMonth(monthsBd.intValue())); + } + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: { + BigDecimal millisBd = (BigDecimal) literal.getValue(); + // Calcite literal is in millis, we need to convert to seconds + BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(secsBd)); + } + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index ba6eefb2d2..67ba55b2e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -19,36 +19,27 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlBinaryOperator; -import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlCastFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.fun.SqlQuantifyOperator; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; -import org.apache.calcite.util.ConversionUtil; import org.apache.calcite.util.DateString; -import org.apache.calcite.util.NlsString; import org.apache.calcite.util.TimestampString; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Decimal128; @@ -59,28 +50,21 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; -import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter.HiveNlsString.Interpretation; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString.Interpretation; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; -import org.apache.hadoop.hive.ql.plan.SubqueryType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -110,77 +94,27 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import java.math.BigDecimal; -import java.math.BigInteger; import java.time.Instant; import java.util.ArrayList; import java.util.Calendar; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; +/** + * Class that contains logic to translate Hive expressions ({@link ExprNodeDesc}) + * into Calcite expressions ({@link RexNode}). + */ public class RexNodeConverter { - private static class InputCtx { - private final RelDataType calciteInpDataType; - private final ImmutableMap hiveNameToPosMap; - private final RowResolver hiveRR; - private final int offsetInCalciteSchema; - - private InputCtx(RelDataType calciteInpDataType, ImmutableMap hiveNameToPosMap, - RowResolver hiveRR, int offsetInCalciteSchema) { - this.calciteInpDataType = calciteInpDataType; - this.hiveNameToPosMap = hiveNameToPosMap; - this.hiveRR = hiveRR; - this.offsetInCalciteSchema = offsetInCalciteSchema; - } - }; - - private final RelOptCluster cluster; - private final ImmutableList inputCtxs; - private final boolean flattenExpr; - - //outerRR belongs to outer query and is required to resolve correlated references - private final RowResolver outerRR; - private final ImmutableMap outerNameToPosMap; - private int correlatedId; - private final int maxNodesForInToOrTransformation; - - //Constructor used by HiveRexExecutorImpl - public RexNodeConverter(RelOptCluster cluster) { - this(cluster, new ArrayList(), false); - } + private final RexBuilder rexBuilder; + private final RelDataTypeFactory typeFactory; - //subqueries will need outer query's row resolver - public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, - ImmutableMap outerNameToPosMap, - ImmutableMap nameToPosMap, RowResolver hiveRR, RowResolver outerRR, - int maxNodesForInToOrTransformation, int offset, boolean flattenExpr, int correlatedId) { - this.cluster = cluster; - this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR, offset)); - this.flattenExpr = flattenExpr; - this.outerRR = outerRR; - this.outerNameToPosMap = outerNameToPosMap; - this.correlatedId = correlatedId; - this.maxNodesForInToOrTransformation = maxNodesForInToOrTransformation; - } - public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, - ImmutableMap nameToPosMap, int offset, boolean flattenExpr) { - this.cluster = cluster; - this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset)); - this.flattenExpr = flattenExpr; - this.outerRR = null; - this.outerNameToPosMap = null; - this.maxNodesForInToOrTransformation = 0; - } - - public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) { - this.cluster = cluster; - this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build(); - this.flattenExpr = flattenExpr; - this.outerRR = null; - this.outerNameToPosMap = null; - this.maxNodesForInToOrTransformation = 0; + /** + * Constructor used by HiveRexExecutorImpl. + */ + public RexNodeConverter(RexBuilder rexBuilder, RelDataTypeFactory typeFactory) { + this.rexBuilder = rexBuilder; + this.typeFactory = typeFactory; } public RexNode convert(ExprNodeDesc expr) throws SemanticException { @@ -188,128 +122,19 @@ public RexNode convert(ExprNodeDesc expr) throws SemanticException { return convert((ExprNodeGenericFuncDesc) expr); } else if (expr instanceof ExprNodeConstantDesc) { return convert((ExprNodeConstantDesc) expr); - } else if (expr instanceof ExprNodeColumnDesc) { - return convert((ExprNodeColumnDesc) expr); } else if (expr instanceof ExprNodeFieldDesc) { return convert((ExprNodeFieldDesc) expr); - } else if(expr instanceof ExprNodeSubQueryDesc) { - return convert((ExprNodeSubQueryDesc) expr); } else { throw new RuntimeException("Unsupported Expression"); } - // TODO: handle ExprNodeColumnListDesc - } - - private RexNode getSomeSubquery(final RelNode subqueryRel, final RexNode lhs, - final SqlQuantifyOperator quantifyOperator) { - if(quantifyOperator == SqlStdOperatorTable.SOME_EQ) { - return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); - } else if (quantifyOperator == SqlStdOperatorTable.SOME_NE) { - RexSubQuery subQuery = RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); - return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, subQuery); - } else { - return RexSubQuery.some(subqueryRel, ImmutableList.of(lhs), quantifyOperator); - } - } - - private void throwInvalidSubqueryError(final ASTNode comparisonOp) throws SemanticException { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid operator:" + comparisonOp.toString())); - } - - // <>ANY and =ALL is not supported - private RexNode convertSubquerySomeAll(final ExprNodeSubQueryDesc subQueryDesc) - throws SemanticException { - assert(subQueryDesc.getType() == SubqueryType.SOME - || subQueryDesc.getType() == SubqueryType.ALL); - - RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); - ASTNode comparisonOp = subQueryDesc.getComparisonOp(); - SqlQuantifyOperator quantifyOperator = null; - - switch (comparisonOp.getType()) { - case HiveParser.EQUAL: - if(subQueryDesc.getType() == SubqueryType.ALL) { - throwInvalidSubqueryError(comparisonOp); - } - quantifyOperator = SqlStdOperatorTable.SOME_EQ; - break; - case HiveParser.LESSTHAN: - quantifyOperator = SqlStdOperatorTable.SOME_LT; - break; - case HiveParser.LESSTHANOREQUALTO: - quantifyOperator = SqlStdOperatorTable.SOME_LE; - break; - case HiveParser.GREATERTHAN: - quantifyOperator = SqlStdOperatorTable.SOME_GT; - break; - case HiveParser.GREATERTHANOREQUALTO: - quantifyOperator = SqlStdOperatorTable.SOME_GE; - break; - case HiveParser.NOTEQUAL: - if(subQueryDesc.getType() == SubqueryType.SOME) { - throwInvalidSubqueryError(comparisonOp); - } - quantifyOperator = SqlStdOperatorTable.SOME_NE; - break; - default: - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid operator:" + comparisonOp.toString())); - } - - if(subQueryDesc.getType() == SubqueryType.ALL) { - quantifyOperator = SqlStdOperatorTable.some(quantifyOperator.comparisonKind.negateNullSafe()); - } - RexNode someQuery = getSomeSubquery(subQueryDesc.getRexSubQuery(), rexNodeLhs, - quantifyOperator); - if(subQueryDesc.getType() == SubqueryType.ALL) { - return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, someQuery); - } - return someQuery; - } - - private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { - if(subQueryDesc.getType() == SubqueryType.IN) { - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ - if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "SubQuery can contain only 1 item in Select List.")); - } - //create RexNode for LHS - RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); - - //create RexSubQuery node - RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(), - ImmutableList.of(rexNodeLhs)); - return rexSubQuery; - } else if(subQueryDesc.getType() == SubqueryType.EXISTS) { - RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery()); - return subQueryNode; - } else if(subQueryDesc.getType() == SubqueryType.SCALAR){ - if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "SubQuery can contain only 1 item in Select List.")); - } - //create RexSubQuery node - RexNode rexSubQuery = RexSubQuery.scalar(subQueryDesc.getRexSubQuery()); - return rexSubQuery; - } else if(subQueryDesc.getType() == SubqueryType.SOME - || subQueryDesc.getType() == SubqueryType.ALL) { - return convertSubquerySomeAll(subQueryDesc); - } else { - throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid subquery: " + subQueryDesc.getType())); - } + // TODO: Handle ExprNodeColumnDesc, ExprNodeColumnListDesc } private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException { RexNode rexNode = convert(fieldDesc.getDesc()); if (rexNode.getType().isStruct()) { // regular case of accessing nested field in a column - return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); + return rexBuilder.makeFieldAccess(rexNode, fieldDesc.getFieldName(), true); } else { // This may happen for schema-less tables, where columns are dynamically // supplied by serdes. @@ -399,32 +224,31 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { isAllPrimitive = isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE; - argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); + argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), typeFactory)); tmpRN = convert(tmpExprNode); childRexNodeLst.add(tmpRN); } // See if this is an explicit cast. - RexNode expr = null; - RelDataType retType = null; - expr = handleExplicitCast(func, childRexNodeLst); + RelDataType retType = TypeConverter.convert(func.getTypeInfo(), typeFactory); + RexNode expr = handleExplicitCast(func.getGenericUDF(), retType, childRexNodeLst, + rexBuilder); if (expr == null) { // This is not a cast; process the function. - retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); if (calciteOp.getKind() == SqlKind.CASE) { // If it is a case operator, we need to rewrite it - childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst); + childRexNodeLst = rewriteCaseChildren(func.getFuncText(), childRexNodeLst, rexBuilder); // Adjust branch types by inserting explicit casts if the actual is ambigous - childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType); + childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder); } else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a extract operator, we need to rewrite it - childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst); + childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst, rexBuilder); } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a floor operator, we need to rewrite it - childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst); + childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst, rexBuilder); } else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) { if (childRexNodeLst.size() == 2) { // if it is a single item in an IN clause, transform A IN (B) to A = B @@ -437,27 +261,19 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { // except complex types // Rewrite to OR is done only if number of operands are less than // the threshold configured - boolean rewriteToOr = true; - if(this.maxNodesForInToOrTransformation != 0) { - if(childRexNodeLst.size() > this.maxNodesForInToOrTransformation) { - rewriteToOr = false; - } - } - if(rewriteToOr) { - childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); - calciteOp = SqlStdOperatorTable.OR; - } + childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst, rexBuilder); + calciteOp = SqlStdOperatorTable.OR; } } else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) { // Rewrite COALESCE as a CASE // This allows to be further reduced to OR, if possible calciteOp = SqlStdOperatorTable.CASE; - childRexNodeLst = rewriteCoalesceChildren(func, childRexNodeLst); + childRexNodeLst = rewriteCoalesceChildren(childRexNodeLst, rexBuilder); // Adjust branch types by inserting explicit casts if the actual is ambigous - childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType); + childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder); } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { - childRexNodeLst = rewriteToDateChildren(childRexNodeLst); + childRexNodeLst = rewriteToDateChildren(childRexNodeLst, rexBuilder); } else if (calciteOp.getKind() == SqlKind.BETWEEN) { assert childRexNodeLst.get(0).isAlwaysTrue() || childRexNodeLst.get(0).isAlwaysFalse(); boolean invert = childRexNodeLst.get(0).isAlwaysTrue(); @@ -473,29 +289,29 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { RexNode rangeL = childRexNodeLst.get(2); RexNode rangeH = childRexNodeLst.get(3); childRexNodeLst.clear(); - childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, rangeL, op)); - childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, op, rangeH)); + childRexNodeLst.add(rexBuilder.makeCall(cmpOp, rangeL, op)); + childRexNodeLst.add(rexBuilder.makeCall(cmpOp, op, rangeH)); } - expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); + expr = rexBuilder.makeCall(retType, calciteOp, childRexNodeLst); } else { retType = expr.getType(); } // TODO: Cast Function in Calcite have a bug where it infer type on cast throws // an exception - if (flattenExpr && (expr instanceof RexCall) + if (expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { RexCall call = (RexCall) expr; - expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), + expr = rexBuilder.makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator())); } return expr; } - private boolean castExprUsingUDFBridge(GenericUDF gUDF) { + private static boolean castExprUsingUDFBridge(GenericUDF gUDF) { boolean castExpr = false; - if (gUDF != null && gUDF instanceof GenericUDFBridge) { + if (gUDF instanceof GenericUDFBridge) { String udfClassName = ((GenericUDFBridge) gUDF).getUdfClassName(); if (udfClassName != null) { int sp = udfClassName.lastIndexOf('.'); @@ -515,20 +331,17 @@ private boolean castExprUsingUDFBridge(GenericUDF gUDF) { return castExpr; } - private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List childRexNodeLst) - throws CalciteSemanticException { + public static RexNode handleExplicitCast(GenericUDF udf, RelDataType returnType, List childRexNodeLst, + RexBuilder rexBuilder) { RexNode castExpr = null; if (childRexNodeLst != null && childRexNodeLst.size() == 1) { - GenericUDF udf = func.getGenericUDF(); if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar) || (udf instanceof GenericUDFToString) || (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate) || (udf instanceof GenericUDFTimestamp) || (udf instanceof GenericUDFToTimestampLocalTZ) || (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) { - castExpr = cluster.getRexBuilder().makeAbstractCast( - TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()), - childRexNodeLst.get(0)); + castExpr = rexBuilder.makeAbstractCast(returnType, childRexNodeLst.get(0)); } } @@ -550,10 +363,10 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c * It will be transformed into: * CASE WHEN =(x + y, 1) THEN 'fee' WHEN =(x + y, 2) THEN 'fie' ELSE null END */ - private List rewriteCaseChildren(ExprNodeGenericFuncDesc func, List childRexNodeLst) - throws SemanticException { - List newChildRexNodeLst = new ArrayList(); - if (FunctionRegistry.getNormalizedFunctionName(func.getFuncText()).equals("case")) { + public static List rewriteCaseChildren(String funcText, List childRexNodeLst, + RexBuilder rexBuilder) throws SemanticException { + List newChildRexNodeLst = new ArrayList<>(); + if (FunctionRegistry.getNormalizedFunctionName(funcText).equals("case")) { RexNode firstPred = childRexNodeLst.get(0); int length = childRexNodeLst.size() % 2 == 1 ? childRexNodeLst.size() : childRexNodeLst.size() - 1; @@ -561,7 +374,7 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c if (i % 2 == 1) { // We rewrite it newChildRexNodeLst.add( - cluster.getRexBuilder().makeCall( + rexBuilder.makeCall( SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); } else { newChildRexNodeLst.add(childRexNodeLst.get(i)); @@ -576,7 +389,7 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c } // Calcite always needs the else clause to be defined explicitly if (newChildRexNodeLst.size() % 2 == 0) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeNullLiteral( + newChildRexNodeLst.add(rexBuilder.makeNullLiteral( newChildRexNodeLst.get(newChildRexNodeLst.size()-1).getType())); } return newChildRexNodeLst; @@ -588,13 +401,13 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c * Calcite is more stricter than hive w.r.t type conversions. * If a CASE has branches with string/int/boolean branch types; there is no common type. */ - private List adjustCaseBranchTypes(List nodes, RelDataType retType) { + public static List adjustCaseBranchTypes(List nodes, RelDataType retType, RexBuilder rexBuilder) { List newNodes = new ArrayList<>(); for (int i = 0; i < nodes.size(); i++) { RexNode node = nodes.get(i); if ((i % 2 == 1 || i == nodes.size() - 1) && !node.getType().getSqlTypeName().equals(retType.getSqlTypeName())) { - newNodes.add(cluster.getRexBuilder().makeCast(retType, node)); + newNodes.add(rexBuilder.makeCast(retType, node)); } else { newNodes.add(node); } @@ -602,33 +415,33 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newNodes; } - private List rewriteExtractDateChildren(SqlOperator op, List childRexNodeLst) - throws SemanticException { + public static List rewriteExtractDateChildren(SqlOperator op, List childRexNodeLst, + RexBuilder rexBuilder) { List newChildRexNodeLst = new ArrayList<>(2); final boolean isTimestampLevel; if (op == HiveExtractDate.YEAR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.YEAR)); isTimestampLevel = false; } else if (op == HiveExtractDate.QUARTER) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.QUARTER)); isTimestampLevel = false; } else if (op == HiveExtractDate.MONTH) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MONTH)); isTimestampLevel = false; } else if (op == HiveExtractDate.WEEK) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.WEEK)); isTimestampLevel = false; } else if (op == HiveExtractDate.DAY) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.DAY)); isTimestampLevel = false; } else if (op == HiveExtractDate.HOUR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.HOUR)); isTimestampLevel = true; } else if (op == HiveExtractDate.MINUTE) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MINUTE)); isTimestampLevel = true; } else if (op == HiveExtractDate.SECOND) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.SECOND)); isTimestampLevel = true; } else { isTimestampLevel = false; @@ -640,76 +453,75 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c } else { // We need to add a cast to DATETIME Family if (isTimestampLevel) { - newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child)); + newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child, rexBuilder)); } else { - newChildRexNodeLst.add(makeCast(SqlTypeName.DATE, child)); + newChildRexNodeLst.add(makeCast(SqlTypeName.DATE, child, rexBuilder)); } } return newChildRexNodeLst; } - private RexNode makeCast(SqlTypeName typeName, final RexNode child) { - RelDataType sqlType = cluster.getTypeFactory().createSqlType(typeName); - RelDataType nullableType = cluster.getTypeFactory().createTypeWithNullability(sqlType, true); - return cluster.getRexBuilder().makeCast(nullableType, child); + private static RexNode makeCast(SqlTypeName typeName, final RexNode child, RexBuilder rexBuilder) { + RelDataType sqlType = rexBuilder.getTypeFactory().createSqlType(typeName); + RelDataType nullableType = rexBuilder.getTypeFactory().createTypeWithNullability(sqlType, true); + return rexBuilder.makeCast(nullableType, child); } - private List rewriteFloorDateChildren(SqlOperator op, List childRexNodeLst) - throws SemanticException { - List newChildRexNodeLst = new ArrayList(); + public static List rewriteFloorDateChildren(SqlOperator op, List childRexNodeLst, + RexBuilder rexBuilder) { + List newChildRexNodeLst = new ArrayList<>(); assert childRexNodeLst.size() == 1; newChildRexNodeLst.add(childRexNodeLst.get(0)); if (op == HiveFloorDate.YEAR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.YEAR)); } else if (op == HiveFloorDate.QUARTER) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.QUARTER)); } else if (op == HiveFloorDate.MONTH) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MONTH)); } else if (op == HiveFloorDate.WEEK) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.WEEK)); } else if (op == HiveFloorDate.DAY) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.DAY)); } else if (op == HiveFloorDate.HOUR) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.HOUR)); } else if (op == HiveFloorDate.MINUTE) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.MINUTE)); } else if (op == HiveFloorDate.SECOND) { - newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND)); + newChildRexNodeLst.add(rexBuilder.makeFlag(TimeUnitRange.SECOND)); } return newChildRexNodeLst; } - private List rewriteToDateChildren(List childRexNodeLst) { - List newChildRexNodeLst = new ArrayList(); + public static List rewriteToDateChildren(List childRexNodeLst, RexBuilder rexBuilder) { + List newChildRexNodeLst = new ArrayList<>(); assert childRexNodeLst.size() == 1; RexNode child = childRexNodeLst.get(0); if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval(child.getType())) { newChildRexNodeLst.add(child); } else { - newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child)); + newChildRexNodeLst.add(makeCast(SqlTypeName.TIMESTAMP, child, rexBuilder)); } return newChildRexNodeLst; } - private List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst) - throws SemanticException { + public static List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst, + RexBuilder rexBuilder) throws SemanticException { assert op.getKind() == SqlKind.IN; RexNode firstPred = childRexNodeLst.get(0); List newChildRexNodeLst = new ArrayList(); for (int i = 1; i < childRexNodeLst.size(); i++) { newChildRexNodeLst.add( - cluster.getRexBuilder().makeCall( + rexBuilder.makeCall( SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); } return newChildRexNodeLst; } - private List rewriteCoalesceChildren( - ExprNodeGenericFuncDesc func, List childRexNodeLst) { + public static List rewriteCoalesceChildren( + List childRexNodeLst, RexBuilder rexBuilder) { final List convertedChildList = Lists.newArrayList(); assert childRexNodeLst.size() > 0; - final RexBuilder rexBuilder = cluster.getRexBuilder(); int i=0; for (; i < childRexNodeLst.size()-1; ++i) { // WHEN child not null THEN child @@ -741,77 +553,7 @@ private static boolean checkForStatefulFunctions(List list) { return false; } - private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { - InputCtx ctxLookingFor = null; - - if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) { - ctxLookingFor = inputCtxs.get(0); - } else { - String tableAlias = col.getTabAlias(); - String colAlias = col.getColumn(); - int noInp = 0; - for (InputCtx ic : inputCtxs) { - if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) { - if (ic.hiveRR.getPosition(colAlias) >= 0) { - ctxLookingFor = ic; - noInp++; - } - } - } - - if (noInp > 1) { - throw new RuntimeException("Ambiguous column mapping"); - } - } - - return ctxLookingFor; - } - - protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException { - //if this is co-rrelated we need to make RexCorrelVariable(with id and type) - // id and type should be retrieved from outerRR - InputCtx ic = getInputCtx(col); - if(ic == null) { - // we have correlated column, build data type from outer rr - RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null); - if (this.outerNameToPosMap.get(col.getColumn()) == null) { - throw new SemanticException(ErrorMsg.INVALID_COLUMN_NAME.getMsg(col.getColumn())); - } - - int pos = this.outerNameToPosMap.get(col.getColumn()); - CorrelationId colCorr = new CorrelationId(this.correlatedId); - RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr); - return cluster.getRexBuilder().makeFieldAccess(corExpr, pos); - } - int pos = ic.hiveNameToPosMap.get(col.getColumn()); - return cluster.getRexBuilder().makeInputRef( - ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema); - } - - private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE), - MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE); - - private static NlsString makeHiveUnicodeString(Interpretation interpretation, String text) { - return new HiveNlsString(interpretation, text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT); - } - - static class HiveNlsString extends NlsString { - - enum Interpretation { - CHAR, VARCHAR, STRING; - } - - public final Interpretation interpretation; - - public HiveNlsString(Interpretation interpretation, String value, String charsetName, SqlCollation collation) { - super(value, charsetName, collation); - this.interpretation = interpretation; - } - - } - protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException { - final RexBuilder rexBuilder = cluster.getRexBuilder(); final RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); final PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo(); final RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory); @@ -844,7 +586,6 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx case LONG: calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value)); break; - // TODO: is Decimal an exact numeric or approximate numeric? case DECIMAL: if (value instanceof HiveDecimal) { value = ((HiveDecimal) value).bigDecimalValue(); @@ -867,8 +608,6 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx // TODO: return createNullLiteral(literal); } BigDecimal bd = (BigDecimal) value; - BigInteger unscaled = bd.unscaledValue(); - int precision = bd.unscaledValue().abs().toString().length(); int scale = bd.scale(); @@ -877,11 +616,11 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx if (precision > scale) { // bd is greater than or equal to 1 relType = - cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, precision, scale); + typeFactory.createSqlType(SqlTypeName.DECIMAL, precision, scale); } else { // bd is less than 1 relType = - cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, scale + 1, scale); + typeFactory.createSqlType(SqlTypeName.DECIMAL, scale + 1, scale); } calciteLiteral = rexBuilder.makeExactLiteral(bd, relType); break; @@ -901,16 +640,19 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx if (value instanceof HiveChar) { value = ((HiveChar) value).getValue(); } - calciteLiteral = rexBuilder.makeCharLiteral(makeHiveUnicodeString(Interpretation.CHAR, (String) value)); + calciteLiteral = rexBuilder.makeCharLiteral( + RexNodeExprFactory.makeHiveUnicodeString(Interpretation.CHAR, (String) value)); break; case VARCHAR: if (value instanceof HiveVarchar) { value = ((HiveVarchar) value).getValue(); } - calciteLiteral = rexBuilder.makeCharLiteral(makeHiveUnicodeString(Interpretation.VARCHAR, (String) value)); + calciteLiteral = rexBuilder.makeCharLiteral( + RexNodeExprFactory.makeHiveUnicodeString(Interpretation.VARCHAR, (String) value)); break; case STRING: - calciteLiteral = rexBuilder.makeCharLiteral(makeHiveUnicodeString(Interpretation.STRING, (String) value)); + calciteLiteral = rexBuilder.makeCharLiteral( + RexNodeExprFactory.makeHiveUnicodeString(Interpretation.STRING, (String) value)); break; case DATE: final Date date = (Date) value; @@ -936,14 +678,10 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx break; case TIMESTAMPLOCALTZ: final TimestampString tsLocalTZString; - if (value == null) { - tsLocalTZString = null; - } else { - Instant i = ((TimestampTZ)value).getZonedDateTime().toInstant(); - tsLocalTZString = TimestampString - .fromMillisSinceEpoch(i.toEpochMilli()) - .withNanos(i.getNano()); - } + Instant i = ((TimestampTZ)value).getZonedDateTime().toInstant(); + tsLocalTZString = TimestampString + .fromMillisSinceEpoch(i.toEpochMilli()) + .withNanos(i.getNano()); calciteLiteral = rexBuilder.makeTimestampWithLocalTimeZoneLiteral( tsLocalTZString, rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)); @@ -973,25 +711,10 @@ protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticEx case BINARY: case UNKNOWN: default: - throw new RuntimeException("UnSupported Literal"); + throw new RuntimeException("Unsupported Literal"); } return calciteLiteral; } - public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode, - List inputRels, LinkedHashMap relToHiveRR, - Map> relToHiveColNameCalcitePosMap, boolean flattenExpr) - throws SemanticException { - List inputCtxLst = new ArrayList(); - - int offSet = 0; - for (RelNode r : inputRels) { - inputCtxLst.add(new InputCtx(r.getRowType(), relToHiveColNameCalcitePosMap.get(r), relToHiveRR - .get(r), offSet)); - offSet += r.getRowType().getFieldCount(); - } - - return (new RexNodeConverter(cluster, inputCtxLst, flattenExpr)).convert(joinCondnExprNode); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index a555749fb9..bb2b618f87 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -239,6 +239,8 @@ public static ASTNode buildAST(SqlOperator op, List children) { case IN: case BETWEEN: case ROW: + case ARRAY_VALUE_CONSTRUCTOR: + case MAP_VALUE_CONSTRUCTOR: case IS_NOT_TRUE: case IS_TRUE: case IS_NOT_FALSE: @@ -377,6 +379,8 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); + registerFunction("array", SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, hToken(HiveParser.Identifier, "array")); + registerFunction("map", SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR, hToken(HiveParser.Identifier, "map")); registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.Identifier, "isnotnull")); registerFunction("isnull", SqlStdOperatorTable.IS_NULL, hToken(HiveParser.Identifier, "isnull")); registerFunction("isnottrue", SqlStdOperatorTable.IS_NOT_TRUE, hToken(HiveParser.Identifier, "isnottrue")); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index ed4a73e9f6..394bfdc590 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.parser.SqlParserPos; @@ -46,6 +47,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -140,7 +142,7 @@ public static RelDataType getType(RelOptCluster cluster, RowResolver rr, } public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) - throws CalciteSemanticException{ + throws CalciteSemanticException { RelDataType convertedType = null; switch (type.getCategory()) { @@ -272,6 +274,31 @@ public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dt throw new CalciteSemanticException("Union type is not supported", UnsupportedFeature.Union_type); } + /** + * This method exists because type information for CHAR literals + * is encoded within the literal value itself. The reason is that + * Calcite considers any character literal as CHAR type by default, + * while Hive is more flexible and may consider them STRING, VARCHAR, + * or CHAR. + */ + public static TypeInfo convertLiteralType(RexLiteral literal) { + if (literal.getType().getSqlTypeName() == SqlTypeName.CHAR) { + // Interpret + HiveNlsString string = (HiveNlsString) RexLiteral.value(literal); + switch (string.interpretation) { + case STRING: + return TypeInfoFactory.stringTypeInfo; + case VARCHAR: + return TypeInfoFactory.getVarcharTypeInfo( + literal.getType().getPrecision()); + case CHAR: + return TypeInfoFactory.getCharTypeInfo( + literal.getType().getPrecision()); + } + } + return TypeConverter.convertPrimitiveType(literal.getType()); + } + public static TypeInfo convert(RelDataType rType) { if (rType.isStruct()) { return convertStructType(rType); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java index eb0d8aaca8..b81e51ff86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/LBProcFactory.java @@ -42,7 +42,7 @@ @Override protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, - TableScanOperator top) throws SemanticException, UDFArgumentException { + TableScanOperator top) throws SemanticException { LBOpWalkerCtx owc = (LBOpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to ExprNodeDesc predicate = fop.getConf().getPredicate(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ee3aaa5799..da3d212b6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -19,6 +19,7 @@ import com.google.common.base.Function; import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableBiMap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; @@ -26,6 +27,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; +import java.util.Map.Entry; +import java.util.Optional; import java.util.regex.Pattern; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; @@ -78,6 +81,7 @@ import org.apache.calcite.rel.convert.ConverterImpl; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; @@ -100,9 +104,12 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexExecutor; +import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; @@ -118,7 +125,6 @@ import org.apache.calcite.sql.SqlWindow; import org.apache.calcite.sql.dialect.HiveSqlDialect; import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.Frameworks; @@ -147,11 +153,27 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.optimizer.calcite.*; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; @@ -164,6 +186,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -242,10 +265,11 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; -import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; +import org.apache.hadoop.hive.ql.parse.type.FunctionHelper; +import org.apache.hadoop.hive.ql.parse.type.FunctionHelper.AggregateInfo; +import org.apache.hadoop.hive.ql.parse.type.HiveFunctionHelper; import org.apache.hadoop.hive.ql.parse.type.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.PlanModifierForReturnPath; -import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter; @@ -259,26 +283,19 @@ import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; +import org.apache.hadoop.hive.ql.parse.type.RexNodeTypeCheck; import org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -1774,13 +1791,13 @@ private RowResolver genRowResolver(Operator op, QB qb) { private class CalcitePlannerAction implements Frameworks.PlannerAction { private RelOptCluster cluster; private RelOptSchema relOptSchema; + private FunctionHelper functionHelper; private final Map partitionCache; private final Map colStatsCache; private final ColumnAccessInfo columnAccessInfo; private Map viewProjectToTableSchema; - //correlated vars across subqueries within same query needs to have different ID - // this will be used in RexNodeConverter to create cor var + // correlated vars across subqueries within same query needs to have different ID private int subqueryId; // this is to keep track if a subquery is correlated and contains aggregate @@ -1819,6 +1836,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu this.cluster = optCluster; this.relOptSchema = relOptSchema; + this.functionHelper = new HiveFunctionHelper(rexBuilder); PerfLogger perfLogger = SessionState.getPerfLogger(); // 1. Gen Calcite Plan @@ -1836,7 +1854,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); // Create executor - RexExecutor executorProvider = new HiveRexExecutorImpl(optCluster); + RexExecutor executorProvider = new HiveRexExecutorImpl(); calciteGenPlan.getCluster().getPlanner().setExecutor(executorProvider); // We need to get the ColumnAccessInfo and viewToTableSchema for views. @@ -2734,18 +2752,13 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r } else if (unparseTranslator != null && unparseTranslator.isEnabled()) { genAllExprNodeDesc(joinCond, input, jCtx); } - Map exprNodes = ExprNodeTypeCheck.genExprNodeJoinCond( - joinCond, jCtx); + Map exprNodes = RexNodeTypeCheck.genExprNodeJoinCond( + joinCond, jCtx, cluster.getRexBuilder()); if (jCtx.getError() != null) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), jCtx.getError())); } - ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); - List inputRels = new ArrayList(); - inputRels.add(leftRel); - inputRels.add(rightRel); - calciteJoinCond = RexNodeConverter.convert(cluster, joinCondnExprNode, inputRels, - relToHiveRR, relToHiveColNameCalcitePosMap, false); + calciteJoinCond = exprNodes.get(joinCond); } else { calciteJoinCond = cluster.getRexBuilder().makeLiteral(true); } @@ -2802,8 +2815,13 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r RexUtil.composeConjunction(cluster.getRexBuilder(), ImmutableList.of(remainingEquiCond, nonEquiConds), false) : nonEquiConds; + final RelDataType combinedRowType = SqlValidatorUtil.createJoinType( + cluster.getTypeFactory(), inputRels[0].getRowType(), inputRels[1].getRowType(), + null, ImmutableList.of()); topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - inputRels[0], inputRels[1], calciteJoinCond); + inputRels[0], inputRels[1], + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), + calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType))); // Create join RR: we need to check whether we need to update left RR in case // previous call to projectNonColumnEquiConditions updated it @@ -2845,7 +2863,14 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r LOG.warn("Duplicates detected when adding columns to RR: see previous message"); } } else { - topRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, calciteJoinType); + final RelDataType combinedRowType = SqlValidatorUtil.createJoinType( + cluster.getTypeFactory(), leftRel.getRowType(), rightRel.getRowType(), + null, ImmutableList.of()); + topRel = HiveJoin.getJoin( + cluster, leftRel, rightRel, + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), + calciteJoinCond, RelOptUtil.getFieldTypeList(combinedRowType)), + calciteJoinType); topRR = RowResolver.getCombinedRR(leftRR, rightRR); if (namedColumns != null) { List tableAliases = new ArrayList<>(); @@ -3235,13 +3260,13 @@ private TableType obtainTableType(Table tabMetaData) { return TableType.NATIVE; } - private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, + private RelNode genFilterRelNode(ASTNode filterNode, RelNode srcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean useCaching) throws SemanticException { - ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), - outerRR, null, useCaching); - if (filterCondn instanceof ExprNodeConstantDesc - && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { + RexNode filterExpression = genRexNode(filterNode, relToHiveRR.get(srcRel), + outerRR, null, useCaching, cluster.getRexBuilder()); + if (RexUtil.isLiteral(filterExpression, false) + && filterExpression.getType().getSqlTypeName() != SqlTypeName.BOOLEAN) { // queries like select * from t1 where 'foo'; // Calcite's rule PushFilterThroughProject chokes on it. Arguably, we // can insert a cast to @@ -3252,16 +3277,16 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, throw new CalciteSemanticException("Filter expression with non-boolean return type.", UnsupportedFeature.Filter_expression_with_non_boolean_return_type); } - ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap - .get(srcRel); - RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(), - outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR, - HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES), - 0, true, subqueryId).convert(filterCondn); - RexNode factoredFilterExpr = RexUtil - .pullFactors(cluster.getRexBuilder(), convertedFilterExpr); - RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - srcRel, factoredFilterExpr); + final ImmutableMap hiveColNameCalcitePosMap = + this.relToHiveColNameCalcitePosMap.get(srcRel); + filterExpression = new CorrelationConverter( + new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)), + outerNameToPosMap, outerRR, subqueryId).apply(filterExpression); + RexNode factoredFilterExpression = RexUtil + .pullFactors(cluster.getRexBuilder(), filterExpression); + RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel, + HiveCalciteUtil.fixUp(cluster.getRexBuilder(), + factoredFilterExpression, RelOptUtil.getFieldTypeList(srcRel.getRowType()))); this.relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); relToHiveColNameCalcitePosMap.put(filterRel, hiveColNameCalcitePosMap); @@ -3269,6 +3294,60 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, return filterRel; } + /** + * Shuttle that replaces certain references with correlation variables + * if needed. + */ + private class CorrelationConverter extends RexShuttle { + private final InputContext inputContext; + private final ImmutableMap outerPositionToColumnName; + private final RowResolver outerRowResolver; + private final int correlatedId; + + private CorrelationConverter(InputContext inputContext, + ImmutableMap outerColumnNameToPosition, RowResolver outerRowResolver, + int correlatedId) { + this.inputContext = inputContext; + this.outerPositionToColumnName = outerColumnNameToPosition == null ? + null : ImmutableBiMap.copyOf(outerColumnNameToPosition).inverse(); + this.outerRowResolver = outerRowResolver; + this.correlatedId = correlatedId; + } + + @Override + public RexNode visitInputRef(RexInputRef col) { + InputContext context = null; + if (inputContext.inputRowResolver == null) { + context = inputContext; + } else { + int index = col.getIndex(); + String colName = inputContext.positionToColumnName.get(index); + if (colName != null) { + context = inputContext; + } + } + + if(context == null) { + // we have correlated column, build data type from outer rr + RelDataType rowType; + try { + rowType = TypeConverter.getType(cluster, outerRowResolver, null); + } catch (CalciteSemanticException e) { + throw new RuntimeException("Error converting type", e); + } + int index = col.getIndex() - inputContext.inputRowType.getFieldList().size(); + if (outerPositionToColumnName.get(index) == null) { + throw new RuntimeException(ErrorMsg.INVALID_COLUMN_NAME.getMsg()); + } + CorrelationId colCorr = new CorrelationId(correlatedId); + RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr); + return cluster.getRexBuilder().makeFieldAccess(corExpr, index); + } + int pos = col.getIndex(); + return cluster.getRexBuilder().makeInputRef( + context.inputRowType.getFieldList().get(pos).getType(), pos); + } + } private RelNode genLateralViewPlans(ASTNode lateralView, Map aliasToRel) throws SemanticException { @@ -3324,19 +3403,18 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al // Output types. They will be the concatenation of the input refs types and // the types of the expressions for the lateral view generated rows // Generate all expressions from lateral view - ExprNodeDesc valuesExpr = genExprNodeDesc(valuesClause, inputRR, false, false); - RexCall convertedOriginalValuesExpr = (RexCall) new RexNodeConverter(this.cluster, inputRel.getRowType(), - inputPosMap, 0, false).convert(valuesExpr); - RelDataType valuesRowType = ((ArraySqlType) convertedOriginalValuesExpr.getType()).getComponentType(); + RexCall valuesExpr = (RexCall) genRexNode( + valuesClause, inputRR, false, false, cluster.getRexBuilder()); + RelDataType valuesRowType = valuesExpr.getType().getComponentType(); List newStructExprs = new ArrayList<>(); - for (RexNode structExpr : convertedOriginalValuesExpr.getOperands()) { + for (RexNode structExpr : valuesExpr.getOperands()) { RexCall structCall = (RexCall) structExpr; List exprs = new ArrayList<>(inputRefs); exprs.addAll(structCall.getOperands()); newStructExprs.add(rexBuilder.makeCall(structCall.op, exprs)); } RexNode convertedFinalValuesExpr = - rexBuilder.makeCall(convertedOriginalValuesExpr.op, newStructExprs); + rexBuilder.makeCall(valuesExpr.op, newStructExprs); // The return type will be the concatenation of input type and original values type RelDataType retType = SqlValidatorUtil.deriveJoinRowType(inputRel.getRowType(), valuesRowType, JoinRelType.INNER, dtFactory, null, ImmutableList.of()); @@ -3394,7 +3472,7 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al columnAliases.add(SemanticAnalyzer.getColumnInternalName(i)); } } - ListTypeInfo listTypeInfo = (ListTypeInfo) valuesExpr.getTypeInfo(); // Array should have ListTypeInfo + ListTypeInfo listTypeInfo = (ListTypeInfo) TypeConverter.convert(valuesExpr.getType()); // Array should have ListTypeInfo StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types for (int i = 0, j = 0; i < columnAliases.size(); i++) { String internalColName; @@ -3470,26 +3548,21 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) throws SemanticException { - - Map subQueryToRelNode = new HashMap<>(); - boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, - subQueryToRelNode); + final Map subQueryToRelNode = new HashMap<>(); + boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, subQueryToRelNode); if(isSubQuery) { - ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel), - outerRR, subQueryToRelNode, forHavingClause); + RexNode filterExpression = genRexNode(searchCond, relToHiveRR.get(srcRel), + outerRR, subQueryToRelNode, forHavingClause, cluster.getRexBuilder()); ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap - .get(srcRel); - RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(), - outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), - outerRR, HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES), - 0, true, subqueryId).convert(subQueryExpr); + .get(srcRel); + filterExpression = new CorrelationConverter( + new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)), + outerNameToPosMap, outerRR, subqueryId).apply(filterExpression); RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - srcRel, convertedFilterLHS); - - this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap - .get(srcRel)); + srcRel, filterExpression); + relToHiveColNameCalcitePosMap.put(filterRel, relToHiveColNameCalcitePosMap.get(srcRel)); relToHiveRR.put(filterRel, relToHiveRR.get(srcRel)); return filterRel; } else { @@ -3511,42 +3584,17 @@ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap m_aggParams; - private final TypeInfo m_returnType; - private final String m_udfName; - private final boolean m_distinct; - - private AggInfo(List aggParams, TypeInfo returnType, String udfName, - boolean isDistinct) { - m_aggParams = aggParams; - m_returnType = returnType; - m_udfName = udfName; - m_distinct = isDistinct; - } - } - - private AggregateCall convertGBAgg(AggInfo agg, List gbChildProjLst, RexNodeConverter converter, + private AggregateCall convertGBAgg(AggregateInfo agg, List gbChildProjLst, HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException { - // 1. Get agg fn ret type in Calcite - RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, + RelDataType aggFnRetType = TypeConverter.convert(agg.getReturnType(), this.cluster.getTypeFactory()); // 2. Convert Agg Fn args and type of args to Calcite - // TODO: Does HQL allows expressions as aggregate args or can it only be - // projections from child? - Integer inputIndx; - List argList = new ArrayList(); - RexNode rexNd = null; - RelDataTypeFactory dtFactory = this.cluster.getTypeFactory(); - ImmutableList.Builder aggArgRelDTBldr = new ImmutableList.Builder(); - for (ExprNodeDesc expr : agg.m_aggParams) { - rexNd = converter.convert(expr); - inputIndx = rexNodeToPosMap.get(rexNd.toString()); + List argList = new ArrayList<>(); + ImmutableList.Builder aggArgRelDTBldr = ImmutableList.builder(); + for (RexNode rexNd : agg.getParameters()) { + Integer inputIndx = rexNodeToPosMap.get(rexNd.toString()); if (inputIndx == null) { gbChildProjLst.add(rexNd); rexNodeToPosMap.put(rexNd.toString(), childProjLstIndx); @@ -3555,35 +3603,28 @@ private AggregateCall convertGBAgg(AggInfo agg, List gbChildProjLst, Re } argList.add(inputIndx); - // TODO: does arg need type cast? - aggArgRelDTBldr.add(TypeConverter.convert(expr.getTypeInfo(), dtFactory)); + aggArgRelDTBldr.add(rexNd.getType()); } // 3. Get Aggregation FN from Calcite given name, ret type and input arg // type - final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, agg.m_distinct, + final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.getAggregateName(), agg.isDistinct(), aggArgRelDTBldr.build(), aggFnRetType); - return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); + return new AggregateCall(aggregation, agg.isDistinct(), argList, aggFnRetType, null); } - private RelNode genGBRelNode(List gbExprs, List aggInfoLst, + private RelNode genGBRelNode(List gbExprs, List aggInfoLst, List groupSets, RelNode srcRel) throws SemanticException { - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, - 0, false); - final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty(); final List gbChildProjLst = Lists.newArrayList(); final HashMap rexNodeToPosMap = new HashMap(); final List groupSetPositions = Lists.newArrayList(); Integer gbIndx = 0; - RexNode rnd; - for (ExprNodeDesc key : gbExprs) { - rnd = converter.convert(key); - gbChildProjLst.add(rnd); + for (RexNode gbExpr : gbExprs) { + gbChildProjLst.add(gbExpr); groupSetPositions.add(gbIndx); - rexNodeToPosMap.put(rnd.toString(), gbIndx); + rexNodeToPosMap.put(gbExpr.toString(), gbIndx); gbIndx++; } final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions); @@ -3603,9 +3644,9 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs } List aggregateCalls = Lists.newArrayList(); - for (AggInfo agg : aggInfoLst) { - aggregateCalls.add(convertGBAgg(agg, gbChildProjLst, converter, rexNodeToPosMap, - gbChildProjLst.size())); + for (AggregateInfo agg : aggInfoLst) { + aggregateCalls.add( + convertGBAgg(agg, gbChildProjLst, rexNodeToPosMap, gbChildProjLst.size())); } if (hasGroupSets) { // Create GroupingID column @@ -3621,7 +3662,12 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs // first element from srcRel gbChildProjLst.add(this.cluster.getRexBuilder().makeInputRef(srcRel, 0)); } - RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); + + // Create input project fixing up nullability of inputs + RelNode gbInputRel = HiveProject.create( + srcRel, + RexUtil.fixUp(cluster.getRexBuilder(), gbChildProjLst, RelOptUtil.getFieldTypeList(srcRel.getRowType())), + null); HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, transformedGroupSets, aggregateCalls); @@ -3673,120 +3719,59 @@ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, } private void addToGBExpr(RowResolver groupByOutputRowResolver, - RowResolver groupByInputRowResolver, ASTNode grpbyExpr, ExprNodeDesc grpbyExprNDesc, - List gbExprNDescLst, List outputColumnNames) { - // TODO: Should we use grpbyExprNDesc.getTypeInfo()? what if expr is - // UDF + RowResolver groupByInputRowResolver, ASTNode grpbyExpr, RexNode grpbyExprNDesc, + List gbExprNDescLst, List outputColumnNames) { int i = gbExprNDescLst.size(); String field = SemanticAnalyzer.getColumnInternalName(i); outputColumnNames.add(field); gbExprNDescLst.add(grpbyExprNDesc); - ColumnInfo oColInfo = new ColumnInfo(field, grpbyExprNDesc.getTypeInfo(), null, false); + ColumnInfo oColInfo = new ColumnInfo(field, TypeConverter.convert(grpbyExprNDesc.getType()), null, false); groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo); addAlternateGByKeyMappings(grpbyExpr, oColInfo, groupByInputRowResolver, groupByOutputRowResolver); } - private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) + private AggregateInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver inputRR) throws SemanticException { - AggInfo aInfo = null; - - // 1 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); + List aggParameters = new ArrayList<>(); for (int i = 1; i <= aggFnLstArgIndx; i++) { - ASTNode paraExpr = (ASTNode) aggAst.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR); - aggParameters.add(paraExprNode); + RexNode parameterExpr = genRexNode( + (ASTNode) aggAst.getChild(i), inputRR, cluster.getRexBuilder()); + aggParameters.add(parameterExpr); } - - // 2. Is this distinct UDAF boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; - - // 3. Determine type of UDAF - TypeInfo udafRetType = null; - - // 3.1 Obtain UDAF name + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); - // 3.2 Rank functions type is 'int'/'double' - if (FunctionRegistry.isRankingFunction(aggName)) { - if (aggName.equalsIgnoreCase("percent_rank")) { - udafRetType = TypeInfoFactory.doubleTypeInfo; - } else { - udafRetType = TypeInfoFactory.intTypeInfo; - } - } else { - // 3.3 Try obtaining UDAF evaluators to determine the ret type - try { - boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; - - // 3.3.1 Get UDAF Evaluator - Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, - isDistinct); - - GenericUDAFEvaluator genericUDAFEvaluator = null; - if (aggName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) - || aggName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { - List originalParameterTypeInfos = SemanticAnalyzer - .getWritableObjectInspector(aggParameters); - genericUDAFEvaluator = FunctionRegistry.getGenericWindowingEvaluator(aggName, - originalParameterTypeInfos, isDistinct, isAllColumns); - GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - udafRetType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); - } else { - genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(aggName, aggParameters, - aggAst, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - - // 3.3.2 Get UDAF Info using UDAF Evaluator - GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - if (FunctionRegistry.pivotResult(aggName)) { - udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); - } else { - udafRetType = udaf.returnType; - } - } - } catch (Exception e) { - LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName - + ", trying to translate to GenericUDF"); - } + AggregateInfo aInfo = functionHelper.getWindowAggregateFunctionInfo( + isDistinct, isAllColumns, aggName, aggParameters); - // 3.4 Try GenericUDF translation - if (udafRetType == null) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - // We allow stateful functions in the SELECT list (but nowhere else) - tcCtx.setAllowStatefulFunctions(true); - tcCtx.setAllowDistinctFunctions(false); - ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); - udafRetType = exp.getTypeInfo(); - } + // If that did not work, try GenericUDF translation + if (aInfo == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + RexNode exp = genRexNode((ASTNode) aggAst.getChild(0), inputRR, tcCtx); + aInfo = new AggregateInfo( + aggParameters, TypeConverter.convert(exp.getType()), aggName, isDistinct); } - // 4. Construct AggInfo - aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); - return aInfo; } /** - * Generate GB plan. - * - * @param qb - * @param srcRel - * @return TODO: 1. Grouping Sets (roll up..) - * @throws SemanticException + * Generate a group by plan. */ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { - RelNode gbRel = null; + RelNode groupByRel = null; QBParseInfo qbp = getQBParseInfo(qb); // 1. Gather GB Expressions (AST) (GB + Aggregations) // NOTE: Multi Insert is not supported - String detsClauseName = qbp.getClauseNames().iterator().next(); + String destClauseName = qbp.getClauseNames().iterator().next(); // Check and transform group by *. This will only happen for select distinct *. // Here the "genSelectPlan" is being leveraged. // The main benefits are (1) remove virtual columns that should @@ -3794,7 +3779,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException // so that view is supported. The drawback is that an additional SEL op is added. If it is // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match // SEL%SEL% rule. - ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); + ASTNode selExprList = qb.getParseInfo().getSelForClause(destClauseName); SubQueryUtils.checkForTopLevelSubqueries(selExprList); if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { @@ -3802,8 +3787,8 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { // As we said before, here we use genSelectLogicalPlan to rewrite AllColRef srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null, true).getKey(); - RowResolver rr = this.relToHiveRR.get(srcRel); - qbp.setSelExprForClause(detsClauseName, genSelectDIAST(rr)); + RowResolver rr = relToHiveRR.get(srcRel); + qbp.setSelExprForClause(destClauseName, genSelectDIAST(rr)); } } @@ -3813,18 +3798,17 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException return null; } - List grpByAstExprs = getGroupByForClause(qbp, detsClauseName); - Map aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); - boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; - boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true - : false; + List groupByNodes = getGroupByForClause(qbp, destClauseName); + Map aggregationTrees = qbp.getAggregationExprsForClause(destClauseName); + boolean hasGrpByAstExprs = groupByNodes != null && !groupByNodes.isEmpty(); + boolean hasAggregationTrees = aggregationTrees != null && !aggregationTrees.isEmpty(); final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); // 2. Sanity check if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) - && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) { + && qbp.getDistinctFuncExprsForClause(destClauseName).size() > 1) { throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg()); } if (cubeRollupGrpSetPresent) { @@ -3833,9 +3817,9 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { - checkExpressionsForGroupingSet(grpByAstExprs, qb.getParseInfo() - .getDistinctFuncExprsForClause(detsClauseName), aggregationTrees, - this.relToHiveRR.get(srcRel)); + checkExpressionsForGroupingSet(groupByNodes, + qb.getParseInfo().getDistinctFuncExprsForClause(destClauseName), + aggregationTrees, relToHiveRR.get(srcRel)); if (qbp.getDestGroupingSets().size() > conf .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY)) { @@ -3847,10 +3831,9 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } } - if (hasGrpByAstExprs || hasAggregationTrees) { - ArrayList gbExprNDescLst = new ArrayList(); - ArrayList outputColumnNames = new ArrayList(); + List groupByExpressions = new ArrayList<>(); + List outputColumnNames = new ArrayList<>(); // 3. Input, Output Row Resolvers RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); @@ -3859,29 +3842,30 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException if (hasGrpByAstExprs) { // 4. Construct GB Keys (ExprNode) - for (int i = 0; i < grpByAstExprs.size(); ++i) { - ASTNode grpbyExpr = grpByAstExprs.get(i); - Map astToExprNDescMap = genAllExprNodeDesc(grpbyExpr, groupByInputRowResolver); - ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); - if (grpbyExprNDesc == null) { - throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump(), + for (int i = 0; i < groupByNodes.size(); ++i) { + ASTNode groupByNode = groupByNodes.get(i); + Map astToRexNodeMap = genAllRexNode( + groupByNode, groupByInputRowResolver, cluster.getRexBuilder()); + RexNode groupByExpression = astToRexNodeMap.get(groupByNode); + if (groupByExpression == null) { + throw new CalciteSemanticException("Invalid Column Reference: " + groupByNode.dump(), UnsupportedFeature.Invalid_column_reference); } - addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, - grpbyExprNDesc, gbExprNDescLst, outputColumnNames); + addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, groupByNode, + groupByExpression, groupByExpressions, outputColumnNames); } } // 5. GroupingSets, Cube, Rollup - int groupingColsSize = gbExprNDescLst.size(); + int groupingColsSize = groupByExpressions.size(); List groupingSets = null; if (cubeRollupGrpSetPresent) { - groupingSets = getGroupByGroupingSetsForClause(qbp, detsClauseName).getRight(); + groupingSets = getGroupByGroupingSetsForClause(qbp, destClauseName).getRight(); } // 6. Construct aggregation function Info - ArrayList aggregations = new ArrayList(); + ArrayList aggregations = new ArrayList(); if (hasAggregationTrees) { assert (aggregationTrees != null); for (ASTNode value : aggregationTrees.values()) { @@ -3892,26 +3876,20 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; // 6.2 Convert UDAF Params to ExprNodeDesc - ArrayList aggParameters = new ArrayList(); + List aggParameters = new ArrayList<>(); for (int i = 1; i < value.getChildCount(); i++) { - ASTNode paraExpr = (ASTNode) value.getChild(i); - ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver); - aggParameters.add(paraExprNode); + RexNode parameterExpr = genRexNode( + (ASTNode) value.getChild(i), groupByInputRowResolver, cluster.getRexBuilder()); + aggParameters.add(parameterExpr); } - Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, - isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator( - aggName, aggParameters, value, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); - GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + AggregateInfo aInfo = functionHelper.getAggregateFunctionInfo( + isDistinct, isAllColumns, aggName, aggParameters); aggregations.add(aInfo); String field = getColumnInternalName(groupingColsSize + aggregations.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, - "", false)); + groupByOutputRowResolver.putExpression(value, + new ColumnInfo(field, aInfo.getReturnType(), "", false)); } } @@ -3928,12 +3906,12 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } // 8. We create the group_by operator - gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); - relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver)); - this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + groupByRel = genGBRelNode(groupByExpressions, aggregations, groupingSets, srcRel); + relToHiveColNameCalcitePosMap.put(groupByRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver)); + relToHiveRR.put(groupByRel, groupByOutputRowResolver); } - return gbRel; + return groupByRel; } /** @@ -3948,8 +3926,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * @throws SemanticException */ private RelNode genOBLogicalPlan(QB qb, Pair selPair, - boolean outermostOB) throws SemanticException { - + boolean outermostOB) throws SemanticException { QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); @@ -3982,8 +3959,7 @@ private RelNode genOBLogicalPlan(QB qb, Pair selPair, } private RelNode genSBLogicalPlan(QB qb, Pair selPair, - boolean outermostOB) throws SemanticException { - + boolean outermostOB) throws SemanticException { QBParseInfo qbp = getQBParseInfo(qb); String dest = qbp.getClauseNames().iterator().next(); ASTNode sbAST = qbp.getSortByForClause(dest); @@ -4023,7 +3999,7 @@ private RelNode genSBLogicalPlan(QB qb, Pair selPair, // - Add Child Project Rel if needed, // - Generate Output RR, input Sel Rel for top constraining Sel private OBLogicalPlanGenState beginGenOBLogicalPlan( - ASTNode obAST, Pair selPair, boolean outermostOB) throws SemanticException { + ASTNode obAST, Pair selPair, boolean outermostOB) throws SemanticException { // selPair.getKey() is the operator right before OB // selPair.getValue() is RR which only contains columns needed in result // set. Extra columns needed by order by will be absent from it. @@ -4041,15 +4017,12 @@ private OBLogicalPlanGenState beginGenOBLogicalPlan( RowResolver inputRR = relToHiveRR.get(srcRel); RowResolver outputRR = new RowResolver(); - RexNode rnd; - RexNodeConverter converter = new RexNodeConverter(cluster, srcRel.getRowType(), - relToHiveColNameCalcitePosMap.get(srcRel), 0, false); int srcRelRecordSz = srcRel.getRowType().getFieldCount(); for (int i = 0; i < obASTExprLst.size(); i++) { // 2.1 Convert AST Expr to ExprNode - ASTNode obASTExpr = (ASTNode) obASTExprLst.get(i); - ASTNode nullObASTExpr = (ASTNode) obASTExpr.getChild(0); + ASTNode orderByNode = (ASTNode) obASTExprLst.get(i); + ASTNode nullObASTExpr = (ASTNode) orderByNode.getChild(0); ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); @@ -4066,25 +4039,24 @@ private OBLogicalPlanGenState beginGenOBLogicalPlan( "the position alias will be ignored."); } } else { - ExprNodeDesc obExprNDesc = getOrderByExprNodeDesc(selectOutputRR, inputRR, obASTExpr, ref); // 2.2 Convert ExprNode to RexNode - rnd = converter.convert(obExprNDesc); + RexNode orderByExpression = getOrderByExpression(selectOutputRR, inputRR, orderByNode, ref); // 2.3 Determine the index of ob expr in child schema // NOTE: Calcite can not take compound exprs in OB without it being // present in the child (& hence we add a child Project Rel) - if (rnd instanceof RexInputRef) { - fieldIndex = ((RexInputRef) rnd).getIndex(); + if (orderByExpression instanceof RexInputRef) { + fieldIndex = ((RexInputRef) orderByExpression).getIndex(); } else { fieldIndex = srcRelRecordSz + newVCLst.size(); - newVCLst.add(rnd); - vcASTTypePairs.add(new Pair<>(ref, obExprNDesc.getTypeInfo())); + newVCLst.add(orderByExpression); + vcASTTypePairs.add(new Pair<>(ref, TypeConverter.convert(orderByExpression.getType()))); } } // 2.4 Determine the Direction of order by RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; - if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { + if (orderByNode.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { order = RelFieldCollation.Direction.ASCENDING; } RelFieldCollation.NullDirection nullOrder; @@ -4146,16 +4118,16 @@ private OBLogicalPlanGenState beginGenOBLogicalPlan( return new OBLogicalPlanGenState(obInputRel, fieldCollations, selectOutputRR, outputRR, srcRel); } - private ExprNodeDesc getOrderByExprNodeDesc( - RowResolver selectOutputRR, RowResolver inputRR, ASTNode obASTExpr, ASTNode ref) - throws SemanticException { + private RexNode getOrderByExpression( + RowResolver selectOutputRR, RowResolver inputRR, ASTNode orderByNode, ASTNode ref) + throws SemanticException { // first try to get it from select // in case of udtf, selectOutputRR may be null. - ExprNodeDesc obExprNDesc = null; + RexNode orderByExpression = null; if (selectOutputRR != null) { try { - Map astToExprNDescMap = genAllExprNodeDesc(ref, selectOutputRR); - obExprNDesc = astToExprNDescMap.get(ref); + Map astToExprNDescMap = genAllRexNode(ref, selectOutputRR, cluster.getRexBuilder()); + orderByExpression = astToExprNDescMap.get(ref); } catch (SemanticException ex) { // we can tolerate this as this is the previous behavior LOG.debug("Can not find column in " + ref.getText() + ". The error msg is " @@ -4163,14 +4135,14 @@ private ExprNodeDesc getOrderByExprNodeDesc( } } // then try to get it from all - if (obExprNDesc == null) { - Map astToExprNDescMap = genAllExprNodeDesc(ref, inputRR); - obExprNDesc = astToExprNDescMap.get(ref); + if (orderByExpression == null) { + Map astToExprNDescMap = genAllRexNode(ref, inputRR, cluster.getRexBuilder()); + orderByExpression = astToExprNDescMap.get(ref); } - if (obExprNDesc == null) { - throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); + if (orderByExpression == null) { + throw new SemanticException("Invalid order by expression: " + orderByNode.toString()); } - return obExprNDesc; + return orderByExpression; } // SELECT a, b FROM t ORDER BY 1 @@ -4249,32 +4221,31 @@ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcept return sortRel; } - private List getPartitionKeys(PartitionSpec ps, RexNodeConverter converter, + private List getPartitionKeys(PartitionSpec ps, RowResolver inputRR) throws SemanticException { - List pKeys = new ArrayList(); + List pKeys = new ArrayList<>(); if (ps != null) { List pExprs = ps.getExpressions(); for (PartitionExpression pExpr : pExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(pExpr.getExpression(), inputRR, tcCtx); - pKeys.add(converter.convert(exp)); + RexNode exp = genRexNode(pExpr.getExpression(), inputRR, tcCtx); + pKeys.add(exp); } } return pKeys; } - private List getOrderKeys(OrderSpec os, RexNodeConverter converter, + private List getOrderKeys(OrderSpec os, RowResolver inputRR) throws SemanticException { - List oKeys = new ArrayList(); + List oKeys = new ArrayList<>(); if (os != null) { List oExprs = os.getExpressions(); for (OrderExpression oExpr : oExprs) { - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); tcCtx.setAllowStatefulFunctions(true); - ExprNodeDesc exp = genExprNodeDesc(oExpr.getExpression(), inputRR, tcCtx); - RexNode ordExp = converter.convert(exp); + RexNode ordExp = genRexNode(oExpr.getExpression(), inputRR, tcCtx); Set flags = new HashSet(); if (oExpr.getOrder() == org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC) { flags.add(SqlKind.DESCENDING); @@ -4360,50 +4331,44 @@ private int getWindowSpecIndx(ASTNode wndAST) { // TODO: do we need to get to child? int wndSpecASTIndx = getWindowSpecIndx(windowProjAst); // 2. Get Hive Aggregate Info - AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, + AggregateInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, this.relToHiveRR.get(srcRel)); // 3. Get Calcite Return type for Agg Fn - wHiveRetType = hiveAggInfo.m_returnType; - RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.m_returnType, + wHiveRetType = hiveAggInfo.getReturnType(); + RelDataType calciteAggFnRetType = TypeConverter.convert(hiveAggInfo.getReturnType(), this.cluster.getTypeFactory()); // 4. Convert Agg Fn args to Calcite - ImmutableMap posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); - RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), - posMap, 0, false); - Builder calciteAggFnArgsBldr = ImmutableList. builder(); - Builder calciteAggFnArgsTypeBldr = ImmutableList. builder(); - for (int i = 0; i < hiveAggInfo.m_aggParams.size(); i++) { - calciteAggFnArgsBldr.add(converter.convert(hiveAggInfo.m_aggParams.get(i))); - calciteAggFnArgsTypeBldr.add(TypeConverter.convert(hiveAggInfo.m_aggParams.get(i) - .getTypeInfo(), this.cluster.getTypeFactory())); + List calciteAggFnArgs = hiveAggInfo.getParameters(); + Builder calciteAggFnArgsTypeBldr = ImmutableList.builder(); + for (int i = 0; i < hiveAggInfo.getParameters().size(); i++) { + calciteAggFnArgsTypeBldr.add(hiveAggInfo.getParameters().get(i).getType()); } - ImmutableList calciteAggFnArgs = calciteAggFnArgsBldr.build(); ImmutableList calciteAggFnArgsType = calciteAggFnArgsTypeBldr.build(); // 5. Get Calcite Agg Fn final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn( - hiveAggInfo.m_udfName, hiveAggInfo.m_distinct, calciteAggFnArgsType, calciteAggFnRetType); + hiveAggInfo.getAggregateName(), hiveAggInfo.isDistinct(), calciteAggFnArgsType, calciteAggFnRetType); // 6. Translate Window spec RowResolver inputRR = relToHiveRR.get(srcRel); WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); - List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); - List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); + List partitionKeys = getPartitionKeys(wndSpec.getPartition(), inputRR); + List orderKeys = getOrderKeys(wndSpec.getOrder(), inputRR); RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart()); RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd()); boolean isRows = wndSpec.getWindowFrame().getWindowType() == WindowType.ROWS; w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, partitionKeys, ImmutableList. copyOf(orderKeys), lowerBound, - upperBound, isRows, true, false, hiveAggInfo.m_distinct); + upperBound, isRows, true, false, hiveAggInfo.isDistinct()); } else { // TODO: Convert to Semantic Exception throw new RuntimeException("Unsupported window Spec"); } - return new Pair(w, wHiveRetType); + return new Pair<>(w, wHiveRetType); } private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet newColumns) @@ -4469,12 +4434,12 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, RelNode srcRel, List windowExpressions) throws CalciteSemanticException { // 1. Build Column Names - Set colNamesSet = new HashSet(); + Set colNamesSet = new HashSet<>(); List cInfoLst = out_rwsch.getRowSchema().getSignature(); - ArrayList columnNames = new ArrayList(); + List columnNames = new ArrayList<>(); Map windowToAlias = null; if (windowExpressions != null ) { - windowToAlias = new HashMap(); + windowToAlias = new HashMap<>(); for (WindowExpressionSpec wes : windowExpressions) { windowToAlias.put(wes.getExpression().toStringTree().toLowerCase(), wes.getAlias()); } @@ -4515,8 +4480,11 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw } // 3 Build Calcite Rel Node for project using converted projections & col - // names - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); + // names. Fix nullability + HiveRelNode selRel = HiveProject.create( + srcRel, + RexUtil.fixUp(cluster.getRexBuilder(), calciteColLst, RelOptUtil.getFieldTypeList(srcRel.getRowType())), + columnNames); // 4. Keep track of colname-to-posmap && RR for new select this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch)); @@ -4577,7 +4545,7 @@ private void setQueryHints(QB qb) throws SemanticException { RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns); srcRel = (selForWindow == null) ? srcRel : selForWindow; - ArrayList col_list = new ArrayList(); + List columnList = new ArrayList<>(); // 1. Get Select Expression List QBParseInfo qbp = getQBParseInfo(qb); @@ -4591,7 +4559,7 @@ private void setQueryHints(QB qb) throws SemanticException { || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); // 2.Row resolvers for input, output - RowResolver out_rwsch = new RowResolver(); + RowResolver outputRR = new RowResolver(); Integer pos = Integer.valueOf(0); // TODO: will this also fix windowing? try RowResolver inputRR = this.relToHiveRR.get(srcRel), starRR = inputRR; @@ -4602,7 +4570,6 @@ private void setQueryHints(QB qb) throws SemanticException { // 3. Query Hints // TODO: Handle Query Hints; currently we ignore them - boolean selectStar = false; int posn = 0; boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT); if (hintPresent) { @@ -4635,9 +4602,9 @@ private void setQueryHints(QB qb) throws SemanticException { if (!fi.isNative()) { unparseTranslator.addIdentifierTranslation((ASTNode) expr.getChild(0)); } - if (genericUDTF != null && (selectStar = exprType == HiveParser.TOK_FUNCTIONSTAR)) { - genColListRegex(".*", null, (ASTNode) expr.getChild(0), - col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); + if (genericUDTF != null && exprType == HiveParser.TOK_FUNCTIONSTAR) { + genRexNodeRegex(".*", null, (ASTNode) expr.getChild(0), + columnList, null, inputRR, starRR, pos, outputRR, qb.getAliases(), false); } } } @@ -4727,13 +4694,14 @@ private void setQueryHints(QB qb) throws SemanticException { boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false, subQueryToRelNode); if(isSubQuery) { - ExprNodeDesc subQueryExpr = genExprNodeDesc(expr, relToHiveRR.get(srcRel), - outerRR, subQueryToRelNode, true); - col_list.add(subQueryExpr); - + RexNode subQueryExpr = genRexNode(expr, relToHiveRR.get(srcRel), + outerRR, subQueryToRelNode, true, cluster.getRexBuilder()); + columnList.add(subQueryExpr); ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), - subQueryExpr.getWritableObjectInspector(), tabAlias, false); - if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeConverter.convert(subQueryExpr.getType())), + tabAlias, false); + if (!outputRR.putWithCheck(tabAlias, colAlias, null, colInfo)) { throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." + colAlias + " => " + colInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR); @@ -4743,10 +4711,9 @@ private void setQueryHints(QB qb) throws SemanticException { // 6.4 Build ExprNode corresponding to colums if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : - getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, - excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); - selectStar = true; + pos = genRexNodeRegex(".*", + expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), + expr, columnList, excludedColumns, inputRR, starRR, pos, outputRR, qb.getAliases(), true); } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() @@ -4755,8 +4722,8 @@ private void setQueryHints(QB qb) throws SemanticException { // In case the expression is a regex COL. // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, col_list, excludedColumns, - inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); + pos = genRexNodeRegex(unescapeIdentifier(expr.getChild(0).getText()), null, + expr, columnList, excludedColumns, inputRR, starRR, pos, outputRR, qb.getAliases(), true); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) @@ -4768,11 +4735,11 @@ private void setQueryHints(QB qb) throws SemanticException { // In case the expression is TABLE.COL (col can be regex). // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex( + pos = genRexNodeRegex( unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), - expr, col_list, excludedColumns, inputRR, starRR, pos, - out_rwsch, qb.getAliases(), true); + expr, columnList, excludedColumns, inputRR, starRR, pos, + outputRR, qb.getAliases(), true); } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI) && !(srcRel instanceof HiveAggregate)) { // Likely a malformed query eg, select hash(distinct c1) from t1; @@ -4780,7 +4747,7 @@ private void setQueryHints(QB qb) throws SemanticException { UnsupportedFeature.Distinct_without_an_aggreggation); } else { // Case when this is an expression - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, cluster.getRexBuilder()); // We allow stateful functions in the SELECT list (but nowhere else) tcCtx.setAllowStatefulFunctions(true); if (!qbp.getDestToGroupBy().isEmpty()) { @@ -4788,35 +4755,36 @@ private void setQueryHints(QB qb) throws SemanticException { expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr, !cubeRollupGrpSetPresent); } - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); - String recommended = recommendName(exp, colAlias); - if (recommended != null && out_rwsch.get(null, recommended) == null) { + RexNode expression = genRexNode(expr, inputRR, tcCtx); + + String recommended = recommendName(expression, colAlias, inputRR); + if (recommended != null && outputRR.get(null, recommended) == null) { colAlias = recommended; } - col_list.add(exp); + columnList.add(expression); + TypeInfo typeInfo = expression.isA(SqlKind.LITERAL) ? + TypeConverter.convertLiteralType((RexLiteral) expression) : + TypeConverter.convert(expression.getType()); ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); - colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) - .isSkewedCol() : false); - out_rwsch.put(tabAlias, colAlias, colInfo); + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo), + tabAlias, false); + outputRR.put(tabAlias, colAlias, colInfo); pos = Integer.valueOf(pos.intValue() + 1); } } } - selectStar = selectStar && exprList.getChildCount() == posn + 1; - // 7. Convert Hive projections to Calcite - List calciteColLst = new ArrayList(); - - RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(), - outerNameToPosMap, buildHiveColNameToInputPosMap(col_list, inputRR), relToHiveRR.get(srcRel), - outerRR, HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES), - 0, false, subqueryId); - for (ExprNodeDesc colExpr : col_list) { - calciteColLst.add(rexNodeConv.convert(colExpr)); - } + // 7. For correlated queries + ImmutableMap hiveColNameCalcitePosMap = + buildHiveColNameToInputPosMap(columnList, inputRR); + CorrelationConverter cc = new CorrelationConverter( + new InputContext(srcRel.getRowType(), hiveColNameCalcitePosMap, relToHiveRR.get(srcRel)), + outerNameToPosMap, outerRR, subqueryId); + columnList = columnList.stream() + .map(cc::apply) + .collect(Collectors.toList()); // 8. Build Calcite Rel RelNode outputRel = null; @@ -4828,7 +4796,7 @@ private void setQueryHints(QB qb) throws SemanticException { // In OP return path, we need to generate a SEL and then a UDTF // following old semantic analyzer. outputRel = genUDTFPlan(genericUDTF, genericUDTFName, udtfTableAlias, udtfColAliases, qb, - calciteColLst, out_rwsch, srcRel); + columnList, outputRR, srcRel); } else { String dest = qbp.getClauseNames().iterator().next(); ASTNode obAST = qbp.getOrderByForClause(dest); @@ -4863,27 +4831,27 @@ public RexNode apply(RelDataTypeField input) { return new RexInputRef(input.getIndex(), input.getType()); } }); - originalRR = out_rwsch.duplicate(); + originalRR = outputRR.duplicate(); for (int i = 0; i < inputRR.getColumnInfos().size(); i++) { ColumnInfo colInfo = new ColumnInfo(inputRR.getColumnInfos().get(i)); - String internalName = SemanticAnalyzer.getColumnInternalName(out_rwsch.getColumnInfos() + String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos() .size() + i); colInfo.setInternalName(internalName); // if there is any confict, then we do not generate it in the new select // otherwise, we add it into the calciteColLst and generate the new select - if (!out_rwsch.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName, + if (!outputRR.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName, colInfo)) { LOG.trace("Column already present in RR. skipping."); } else { - calciteColLst.add(originalInputRefs.get(i)); + columnList.add(originalInputRefs.get(i)); } } - outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + outputRel = genSelectRelNode(columnList, outputRR, srcRel); // outputRel is the generated augmented select with extra unselected // columns, and originalRR is the original generated select return new Pair(outputRel, originalRR); } else { - outputRel = genSelectRelNode(calciteColLst, out_rwsch, srcRel); + outputRel = genSelectRelNode(columnList, outputRR, srcRel); } } // 9. Handle select distinct as GBY if there exist windowing functions @@ -4892,8 +4860,8 @@ public RexNode apply(RelDataTypeField input) { outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), outputRel, groupSet, null, new ArrayList()); RowResolver groupByOutputRowResolver = new RowResolver(); - for (int i = 0; i < out_rwsch.getColumnInfos().size(); i++) { - ColumnInfo colInfo = out_rwsch.getColumnInfos().get(i); + for (int i = 0; i < outputRR.getColumnInfos().size(); i++) { + ColumnInfo colInfo = outputRR.getColumnInfos().get(i); ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo); @@ -4906,6 +4874,19 @@ public RexNode apply(RelDataTypeField input) { return new Pair(outputRel, null); } + Integer genRexNodeRegex(String colRegex, String tabAlias, ASTNode sel, + List exprList, Set excludeCols, RowResolver input, + RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, + boolean ensureUniqueCols) throws SemanticException { + List> colList = new ArrayList<>(); + Integer i = genColListRegex(colRegex, tabAlias, sel, + colList, excludeCols, input, colSrcRR, pos, output, aliases, ensureUniqueCols); + for (org.apache.commons.lang3.tuple.Pair p : colList) { + exprList.add(RexNodeTypeCheck.toExprNode(p.getLeft(), p.getRight(), 0, cluster.getRexBuilder())); + } + return i; + } + private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, String outputTableAlias, ArrayList colAliases, QB qb, List selectColLst, RowResolver selectRR, RelNode input) throws SemanticException { @@ -4929,29 +4910,18 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases); - // Use the RowResolver from the input operator to generate a input - // ObjectInspector that can be used to initialize the UDTF. Then, the - // resulting output object inspector can be used to make the RowResolver - // for the UDTF operator - List inputCols = selectRR.getColumnInfos(); - - // Create the object inspector for the input columns and initialize the + // Create the return type info for the input columns and initialize the // UDTF - List colNames = new ArrayList(); - ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()]; - for (int i = 0; i < inputCols.size(); i++) { - colNames.add(inputCols.get(i).getInternalName()); - colOIs[i] = inputCols.get(i).getObjectInspector(); - } - StandardStructObjectInspector rowOI = ObjectInspectorFactory - .getStandardStructObjectInspector(colNames, Arrays.asList(colOIs)); - StructObjectInspector outputOI = genericUDTF.initialize(rowOI); + StructTypeInfo type = (StructTypeInfo) TypeConverter.convert( + functionHelper.getReturnType( + functionHelper.getFunctionInfo(genericUDTFName), + selectColLst)); - int numUdtfCols = outputOI.getAllStructFieldRefs().size(); + int numUdtfCols = type.getAllStructFieldNames().size(); if (colAliases.isEmpty()) { // user did not specfied alias names, infer names from outputOI - for (StructField field : outputOI.getAllStructFieldRefs()) { - colAliases.add(field.getFieldName()); + for (String fieldName : type.getAllStructFieldNames()) { + colAliases.add(fieldName); } } // Make sure that the number of column aliases in the AS clause matches @@ -4966,7 +4936,9 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str List udtfCols = new ArrayList(); Iterator colAliasesIter = colAliases.iterator(); - for (StructField sf : outputOI.getAllStructFieldRefs()) { + for (int i = 0; i < type.getAllStructFieldTypeInfos().size(); i++) { + final String fieldName = type.getAllStructFieldNames().get(i); + final TypeInfo fieldTypeInfo = type.getAllStructFieldTypeInfos().get(i); String colAlias = colAliasesIter.next(); assert (colAlias != null); @@ -4974,23 +4946,21 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str // Since the UDTF operator feeds into a LVJ operator that will rename // all the internal names, we can just use field name from the UDTF's OI // as the internal name - ColumnInfo col = new ColumnInfo(sf.getFieldName(), - TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), - outputTableAlias, false); + ColumnInfo col = new ColumnInfo(fieldName, fieldTypeInfo, outputTableAlias, false); udtfCols.add(col); } // Create the row resolver for this operator from the output columns - RowResolver out_rwsch = new RowResolver(); + RowResolver outputRR = new RowResolver(); for (int i = 0; i < udtfCols.size(); i++) { - out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i)); + outputRR.put(outputTableAlias, colAliases.get(i), udtfCols.get(i)); } // Add the UDTFOperator to the operator DAG RelTraitSet traitSet = TraitsUtil.getDefaultTraitSet(cluster); // Build row type from field - RelDataType retType = TypeConverter.getType(cluster, out_rwsch, null); + RelDataType retType = TypeConverter.getType(cluster, outputRR, null); Builder argTypeBldr = ImmutableList. builder(); @@ -5013,8 +4983,8 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str RelNode udtf = HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType, null); // Add new rel & its RR to the maps - relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(out_rwsch)); - relToHiveRR.put(udtf, out_rwsch); + relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(outputRR)); + relToHiveRR.put(udtf, outputRR); return udtf; } @@ -5291,18 +5261,15 @@ public Object post(Object t) { } private ImmutableMap buildHiveColNameToInputPosMap( - List col_list, RowResolver inputRR) { - // Build a map of Hive column Names (ExprNodeColumnDesc Name) - // to the positions of those projections in the input - Multimap hashCodeTocolumnDescMap = ArrayListMultimap.create(); - ExprNodeDescUtils.getExprNodeColumnDesc(col_list, hashCodeTocolumnDescMap); - ImmutableMap.Builder hiveColNameToInputPosMapBuilder = new ImmutableMap.Builder(); - String exprNodecolName; - for (ExprNodeColumnDesc exprDesc : hashCodeTocolumnDescMap.values()) { - exprNodecolName = exprDesc.getColumn(); - hiveColNameToInputPosMapBuilder.put(exprNodecolName, inputRR.getPosition(exprNodecolName)); + List columnList, RowResolver inputRR) { + final ImmutableBitSet refs = + RelOptUtil.InputFinder.bits(columnList, null); + ImmutableMap.Builder hiveColNameToInputPosMapBuilder = + new ImmutableMap.Builder<>(); + for (int ref : refs) { + hiveColNameToInputPosMapBuilder.put( + inputRR.getColumnInfos().get(ref).getInternalName(), ref); } - return hiveColNameToInputPosMapBuilder.build(); } @@ -5368,6 +5335,216 @@ protected Table getTableObjectByName(String tabName, boolean throwException) thr return tabNameToTabObject.get(fullyQualName); } + RexNode genRexNode(ASTNode expr, RowResolver input, + RowResolver outerRR, Map subqueryToRelNode, + boolean useCaching, RexBuilder rexBuilder) throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder, useCaching, false); + tcCtx.setOuterRR(outerRR); + tcCtx.setSubqueryToRelNode(subqueryToRelNode); + return genRexNode(expr, input, tcCtx); + } + + /** + * Generates a Calcite {@link RexNode} for the expression with TypeCheckCtx. + */ + RexNode genRexNode(ASTNode expr, RowResolver input, RexBuilder rexBuilder) + throws SemanticException { + // Since the user didn't supply a customized type-checking context, + // use default settings. + return genRexNode(expr, input, true, false, rexBuilder); + } + + RexNode genRexNode(ASTNode expr, RowResolver input, boolean useCaching, + boolean foldExpr, RexBuilder rexBuilder) throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder, useCaching, foldExpr); + return genRexNode(expr, input, tcCtx); + } + + /** + * Generates a Calcite {@link RexNode} for the expression and children of it + * with default TypeCheckCtx. + */ + Map genAllRexNode(ASTNode expr, RowResolver input, RexBuilder rexBuilder) + throws SemanticException { + TypeCheckCtx tcCtx = new TypeCheckCtx(input, rexBuilder); + return genAllRexNode(expr, input, tcCtx); + } + + /** + * Returns a Calcite {@link RexNode} for the expression. + * If it is evaluated already in previous operator, it can be retrieved from cache. + */ + RexNode genRexNode(ASTNode expr, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { + RexNode cached = null; + if (tcCtx.isUseCaching()) { + cached = getRexNodeCached(expr, input, tcCtx); + } + if (cached == null) { + Map allExprs = genAllRexNode(expr, input, tcCtx); + return allExprs.get(expr); + } + return cached; + } + + /** + * Find RexNode for the expression cached in the RowResolver. Returns null if not exists. + */ + private RexNode getRexNodeCached(ASTNode node, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { + ColumnInfo colInfo = input.getExpression(node); + if (colInfo != null) { + ASTNode source = input.getExpressionSource(node); + if (source != null) { + unparseTranslator.addCopyTranslation(node, source); + } + return RexNodeTypeCheck.toExprNode(colInfo, input, 0, tcCtx.getRexBuilder()); + } + return null; + } + + /** + * Generates all of the Calcite {@link RexNode}s for the expression and children of it + * passed in the arguments. This function uses the row resolver and the metadata information + * that are passed as arguments to resolve the column names to internal names. + * + * @param expr + * The expression + * @param input + * The row resolver + * @param tcCtx + * Customized type-checking context + * @return expression to exprNodeDesc mapping + * @throws SemanticException Failed to evaluate expression + */ + Map genAllRexNode(ASTNode expr, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { + // Create the walker and the rules dispatcher. + tcCtx.setUnparseTranslator(unparseTranslator); + + Map nodeOutputs = + RexNodeTypeCheck.genExprNode(expr, tcCtx); + RexNode desc = nodeOutputs.get(expr); + if (desc == null) { + String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr + .getChild(0).getText()); + ColumnInfo colInfo = input.get(null, tableOrCol); + String errMsg; + if (colInfo == null && input.getIsExprResolver()){ + errMsg = ASTErrorUtils.getMsg( + ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr); + } else { + errMsg = tcCtx.getError(); + } + throw new SemanticException(Optional.ofNullable(errMsg).orElse("Error in parsing ")); + } + if (desc instanceof HiveRexExprList) { + throw new SemanticException("TOK_ALLCOLREF is not supported in current context"); + } + + if (!unparseTranslator.isEnabled()) { + // Not creating a view, so no need to track view expansions. + return nodeOutputs; + } + + List fieldDescList = new ArrayList<>(); + + for (Map.Entry entry : nodeOutputs.entrySet()) { + if (!(entry.getValue() instanceof RexInputRef)) { + // we need to translate the RexFieldAccess too, e.g., identifiers in + // struct<>. + if (entry.getValue() instanceof RexFieldAccess) { + fieldDescList.add(entry.getKey()); + } + continue; + } + ASTNode node = entry.getKey(); + RexInputRef columnDesc = (RexInputRef) entry.getValue(); + int index = columnDesc.getIndex(); + String[] tmp; + if (index < input.getColumnInfos().size()) { + ColumnInfo columnInfo = input.getColumnInfos().get(index); + if (columnInfo.getTabAlias() == null + || columnInfo.getTabAlias().length() == 0) { + // These aren't real column refs; instead, they are special + // internal expressions used in the representation of aggregation. + continue; + } + tmp = input.reverseLookup(columnInfo.getInternalName()); + } else { + // in subquery case, tmp may be from outside. + ColumnInfo columnInfo = tcCtx.getOuterRR().getColumnInfos().get( + index - input.getColumnInfos().size()); + if (columnInfo.getTabAlias() == null + || columnInfo.getTabAlias().length() == 0) { + continue; + } + tmp = tcCtx.getOuterRR().reverseLookup(columnInfo.getInternalName()); + } + StringBuilder replacementText = new StringBuilder(); + replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf)); + replacementText.append("."); + replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf)); + unparseTranslator.addTranslation(node, replacementText.toString()); + } + + for (ASTNode node : fieldDescList) { + Map map = translateFieldDesc(node); + for (Entry entry : map.entrySet()) { + unparseTranslator.addTranslation(entry.getKey(), entry.getValue()); + } + } + + return nodeOutputs; + } + + private String recommendName(RexNode exp, String colAlias, RowResolver rowResolver) { + if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { + return null; + } + String column = recommendInputName(exp, rowResolver); + if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) { + return column; + } + return null; + } + + /** + * Recommend name for the expression + */ + private static String recommendInputName(RexNode desc, RowResolver rowResolver) { + Integer pos = null; + if (desc instanceof RexInputRef) { + pos = ((RexInputRef) desc).getIndex(); + } + if (desc.isA(SqlKind.CAST)) { + RexNode input = ((RexCall) desc).operands.get(0); + if (input instanceof RexInputRef) { + pos = ((RexInputRef) input).getIndex(); + } + } + return pos != null ? + rowResolver.getColumnInfos().get(pos).getInternalName() : + null; + } + + + /** + * Contains information useful to decorrelate queries. + */ + protected static class InputContext { + protected final RelDataType inputRowType; + protected final ImmutableBiMap positionToColumnName; + protected final RowResolver inputRowResolver; + + protected InputContext(RelDataType inputRowType, ImmutableMap columnNameToPosition, + RowResolver inputRowResolver) { + this.inputRowType = inputRowType; + this.positionToColumnName = ImmutableBiMap.copyOf(columnNameToPosition).inverse(); + this.inputRowResolver = inputRowResolver.duplicate(); + } + } + /** * This method can be called at startup time to pre-register all the * additional Hive classes (compared to Calcite core classes) that may diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c87f2d2292..8b37161c52 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -357,7 +357,7 @@ private final GlobalLimitCtx globalLimitCtx; // prefix for column names auto generated by hive - private final String autogenColAliasPrfxLbl; + protected final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName; // Keep track of view alias to read entity corresponding to the view @@ -3598,12 +3598,25 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, return output; } + Integer genExprNodeDescRegex(String colRegex, String tabAlias, ASTNode sel, + List exprList, Set excludeCols, RowResolver input, + RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, + boolean ensureUniqueCols) throws SemanticException { + List> colList = new ArrayList<>(); + Integer i = genColListRegex(colRegex, tabAlias, sel, + colList, excludeCols, input, colSrcRR, pos, output, aliases, ensureUniqueCols); + for (Pair p : colList) { + exprList.add(ExprNodeTypeCheck.toExprNode(p.getLeft(), p.getRight())); + } + return i; + } + @SuppressWarnings("nls") - // TODO: make aliases unique, otherwise needless rewriting takes place + // TODO: make aliases unique, otherwise needless rewriting takes place Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, - List colList, Set excludeCols, RowResolver input, - RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, - boolean ensureUniqueCols) throws SemanticException { + List> colList, Set excludeCols, RowResolver input, + RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, + boolean ensureUniqueCols) throws SemanticException { if (colSrcRR == null) { colSrcRR = input; @@ -3671,9 +3684,7 @@ Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, } ColumnInfo oColInfo = inputColsProcessed.get(colInfo); if (oColInfo == null) { - ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name, - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); - colList.add(expr); + colList.add(Pair.of(colInfo, colSrcRR)); oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); inputColsProcessed.put(colInfo, oColInfo); @@ -3760,9 +3771,7 @@ Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ColumnInfo oColInfo = inputColsProcessed.get(colInfo); if (oColInfo == null) { - ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name, - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); - colList.add(expr); + colList.add(Pair.of(colInfo, input)); oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); inputColsProcessed.put(colInfo, oColInfo); @@ -4471,7 +4480,7 @@ boolean isRegex(String pattern, HiveConf conf) { .getChild(0)); } if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) { - genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0), + genExprNodeDescRegex(".*", null, (ASTNode) udtfExpr.getChild(0), colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); } } @@ -4579,7 +4588,7 @@ boolean isRegex(String pattern, HiveConf conf) { // The real expression if (expr.getType() == HiveParser.TOK_ALLCOLREF) { int initPos = pos; - pos = genColListRegex(".*", expr.getChildCount() == 0 ? null + pos = genExprNodeDescRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); if (unparseTranslator.isEnabled()) { @@ -4592,7 +4601,7 @@ boolean isRegex(String pattern, HiveConf conf) { // In case the expression is a regex COL. // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), + pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL @@ -4603,7 +4612,7 @@ boolean isRegex(String pattern, HiveConf conf) { // In case the expression is TABLE.COL (col can be regex). // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), + pos = genExprNodeDescRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), expr, colList, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false); } else { @@ -4844,15 +4853,22 @@ boolean autogenColAliasPrfxIncludeFuncName() { * for each GroupBy aggregation. */ public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, - List aggParameters, ASTNode aggTree, - boolean isDistinct, boolean isAllColumns) + List aggParameters, ASTNode aggTree, + boolean isDistinct, boolean isAllColumns) + throws SemanticException { + return getGenericUDAFEvaluator2(aggName, getWritableObjectInspector(aggParameters), + aggTree, isDistinct, isAllColumns); + } + + public static GenericUDAFEvaluator getGenericUDAFEvaluator2(String aggName, + List aggParameterOIs, ASTNode aggTree, + boolean isDistinct, boolean isAllColumns) throws SemanticException { - List originalParameterTypeInfos = getWritableObjectInspector(aggParameters); GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( - aggName, originalParameterTypeInfos, isDistinct, isAllColumns); + aggName, aggParameterOIs, isDistinct, isAllColumns); if (null == result) { String reason = "Looking for UDAF Evaluator\"" + aggName - + "\" with parameters " + originalParameterTypeInfos; + + "\" with parameters " + aggParameterOIs; throw new SemanticException(ASTErrorUtils.getMsg( ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(), (ASTNode) aggTree.getChild(0), reason)); @@ -4872,7 +4888,16 @@ public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, * when the UDAF is not found or has problems. */ public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, - GenericUDAFEvaluator.Mode emode, List aggParameters) + GenericUDAFEvaluator.Mode emode, List aggParameters) + throws SemanticException { + GenericUDAFInfo udafInfo = getGenericUDAFInfo2( + evaluator, emode, getWritableObjectInspector(aggParameters)); + udafInfo.convertedParameters = aggParameters; + return udafInfo; + } + + public static GenericUDAFInfo getGenericUDAFInfo2(GenericUDAFEvaluator evaluator, + GenericUDAFEvaluator.Mode emode, List aggOIs) throws SemanticException { GenericUDAFInfo r = new GenericUDAFInfo(); @@ -4883,7 +4908,6 @@ public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, // set r.returnType ObjectInspector returnOI = null; try { - List aggOIs = getWritableObjectInspector(aggParameters); ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()]; for (int ii = 0; ii < aggOIs.size(); ++ii) { aggOIArray[ii] = aggOIs.get(ii); @@ -4893,9 +4917,6 @@ public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, } catch (HiveException e) { throw new SemanticException(e); } - // set r.convertedParameters - // TODO: type conversion - r.convertedParameters = aggParameters; return r; } @@ -7292,7 +7313,7 @@ private ExprNodeDesc getNotNullConstraintExpr(Table targetTable, Operator input, continue; } if (nullConstraintBitSet.indexOf(constraintIdx) != -1) { - ExprNodeDesc currExpr = ExprNodeTypeCheck.toExprNodeDesc(colInfos.get(colExprIdx)); + ExprNodeDesc currExpr = ExprNodeTypeCheck.toExprNode(colInfos.get(colExprIdx), null); ExprNodeDesc isNotNullUDF = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() .getFuncExprNodeDesc("isnotnull", currExpr); if (currUDF != null) { @@ -13035,16 +13056,6 @@ public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) return genExprNodeDesc(expr, input, true, false); } - ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, - RowResolver outerRR, Map subqueryToRelNode, - boolean useCaching) throws SemanticException { - - TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, false); - tcCtx.setOuterRR(outerRR); - tcCtx.setSubqueryToRelNode(subqueryToRelNode); - return genExprNodeDesc(expr, input, tcCtx); - } - ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching, boolean foldExpr) throws SemanticException { TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, foldExpr); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java index afc2bbbf66..0be167e45d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprFactory.java @@ -20,10 +20,11 @@ import java.math.BigDecimal; import java.time.ZoneId; import java.util.List; +import org.apache.calcite.rex.RexNode; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.SubqueryType; @@ -33,8 +34,7 @@ import org.apache.hive.common.util.DateUtils; /** - * Generic expressions factory. Currently, the only implementation produces - * Hive {@link ExprNodeDesc}. + * Generic expressions factory. */ public abstract class ExprFactory { @@ -50,7 +50,8 @@ * Generates an expression from the input column. This may not necessarily * be a column expression, e.g., if the column is a constant. */ - protected abstract T toExpr(ColumnInfo colInfo); + protected abstract T toExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws SemanticException; /* FIELD REFERENCES */ /** @@ -61,12 +62,19 @@ /** * Creates column expression. */ - protected abstract T createColumnRefExpr(ColumnInfo colInfo); + protected abstract T createColumnRefExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws SemanticException; + + /** + * Creates column expression. + */ + protected abstract T createColumnRefExpr(ColumnInfo colInfo, List rowResolverList) + throws SemanticException; /** * Returns column name referenced by a column expression. */ - protected abstract String getColumnName(T expr); + protected abstract String getColumnName(T expr, RowResolver rowResolver); /* CONSTANT EXPRESSIONS */ /** @@ -130,7 +138,7 @@ protected boolean isAllConstants(List exprs) { /** * Creates a double constant expression from input value. */ - protected abstract T createDoubleConstantExpr(String value); + protected abstract T createDoubleConstantExpr(String value) throws SemanticException; /** * Creates a decimal constant expression from input value. @@ -205,7 +213,7 @@ protected boolean isAllConstants(List exprs) { * Default generator for constant expression when type cannot be inferred * from input query. */ - protected T createConstantExpr(String value) { + protected T createConstantExpr(String value) throws SemanticException { // The expression can be any one of Double, Long and Integer. We // try to parse the expression in that order to ensure that the // most specific type is used for conversion. @@ -227,10 +235,17 @@ protected T createConstantExpr(String value) { return result != null ? result : result2; } + /** + * Creates a struct with given type. + */ + protected abstract T createStructExpr(TypeInfo typeInfo, List operands) + throws SemanticException; + /** * Creates a constant expression from input value with given type. */ - protected abstract T createConstantExpr(TypeInfo typeInfo, Object constantValue); + protected abstract T createConstantExpr(TypeInfo typeInfo, Object constantValue) + throws SemanticException; /** * Adjust type of constant value based on input type, e.g., adjust precision and scale @@ -249,12 +264,18 @@ protected abstract Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetT */ protected abstract Object getConstantValue(T expr); + /** + * Returns value stored in a constant expression as String. + */ + protected abstract String getConstantValueAsString(T expr); + /* METHODS FOR NESTED FIELD REFERENCES CREATION */ /** * Creates a reference to a nested field. */ protected abstract T createNestedColumnRefExpr( - TypeInfo typeInfo, T expr, String fieldName, Boolean isList); + TypeInfo typeInfo, T expr, String fieldName, Boolean isList) + throws SemanticException; /* FUNCTIONS */ /** @@ -266,13 +287,13 @@ protected abstract T createNestedColumnRefExpr( * Creates function call expression. */ protected abstract T createFuncCallExpr(TypeInfo typeInfo, GenericUDF genericUDF, - List inputs); + List inputs) throws SemanticException; /** * Creates function call expression. */ protected abstract T createFuncCallExpr(GenericUDF genericUDF, String funcText, - List inputs) throws UDFArgumentException; + List inputs) throws SemanticException; /** * Returns whether the input expression is an OR function call. @@ -289,6 +310,11 @@ protected abstract T createFuncCallExpr(GenericUDF genericUDF, String funcText, */ protected abstract boolean isPOSITIVEFuncCallExpr(T expr); + /** + * Returns whether the input expression is a NEGATIVE function call. + */ + protected abstract boolean isNEGATIVEFuncCallExpr(T expr); + /** * Returns whether the input expression is a STRUCT function call. */ @@ -310,7 +336,7 @@ protected abstract T createFuncCallExpr(GenericUDF genericUDF, String funcText, * Creates subquery expression. */ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, SubqueryType subqueryType, - Object[] inputs) throws CalciteSubquerySemanticException; + Object[] inputs) throws SemanticException; /* LIST OF EXPRESSIONS */ /** @@ -324,12 +350,10 @@ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, Su protected abstract T createExprsListExpr(); /** - * Adds expression to list of expressions and returns resulting - * list. - * If column list is mutable, it will not create a copy - * of the input list. + * Adds expression to list of expressions (list needs to be + * mutable). */ - protected abstract T addExprToExprsList(T columnList, T expr); + protected abstract void addExprToExprsList(T columnList, T expr); /* TYPE SYSTEM */ /** @@ -337,13 +361,18 @@ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, Su */ protected abstract TypeInfo getTypeInfo(T expr); + /** + * Returns the list of types in the input struct expression. + */ + protected abstract List getStructTypeInfoList(T expr); + /** * Changes the type of the input expression to the input type and * returns resulting expression. * If the input expression is mutable, it will not create a copy * of the expression. */ - protected abstract T setTypeInfo(T expr, TypeInfo type); + protected abstract T setTypeInfo(T expr, TypeInfo type) throws SemanticException; /* MISC */ /** @@ -358,4 +387,9 @@ protected abstract T createSubqueryExpr(TypeCheckCtx ctx, ASTNode subqueryOp, Su */ protected abstract List getExprChildren(T expr); + /** + * Returns the list of names in the input struct expression. + */ + protected abstract List getStructNameList(T expr); + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java index 16d354667b..49bae0e742 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeDescExprFactory.java @@ -36,12 +36,14 @@ import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; @@ -53,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc; import org.apache.hadoop.hive.ql.plan.SubqueryType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -67,6 +70,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -94,7 +98,8 @@ protected boolean isExprInstance(Object o) { * {@inheritDoc} */ @Override - protected ExprNodeDesc toExpr(ColumnInfo colInfo) { + protected ExprNodeDesc toExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws SemanticException { ObjectInspector inspector = colInfo.getObjectInspector(); if (inspector instanceof ConstantObjectInspector && inspector instanceof PrimitiveObjectInspector) { return toPrimitiveConstDesc(colInfo, inspector); @@ -188,7 +193,15 @@ private static ExprNodeConstantDesc toStructConstDesc(ColumnInfo colInfo, Object * {@inheritDoc} */ @Override - protected ExprNodeColumnDesc createColumnRefExpr(ColumnInfo colInfo) { + protected ExprNodeColumnDesc createColumnRefExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) { + return new ExprNodeColumnDesc(colInfo); + } + + /** + * {@inheritDoc} + */ + @Override + protected ExprNodeColumnDesc createColumnRefExpr(ColumnInfo colInfo, List rowResolverList) { return new ExprNodeColumnDesc(colInfo); } @@ -345,7 +358,7 @@ protected Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetType, Obje String constTypeInfoName = sourceType.getTypeName(); if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { // because a comparison against a "string" will happen in "string" type. - // to avoid unintnetional comparisions in "string" + // to avoid unintentional comparisons in "string" // constants which are representing char/varchar values must be converted to the // appropriate type. if (targetType instanceof CharTypeInfo) { @@ -493,6 +506,19 @@ protected ExprNodeConstantDesc createIntervalSecondConstantExpr(String value) { bdNanos.multiply(NANOS_PER_SEC_BD).intValue())); } + /** + * {@inheritDoc} + */ + @Override + protected ExprNodeDesc createStructExpr(TypeInfo typeInfo, List operands) + throws SemanticException { + assert typeInfo instanceof StructTypeInfo; + return ExprNodeGenericFuncDesc.newInstance( + new GenericUDFStruct(), + GenericUDFStruct.class.getAnnotation(Description.class).name(), + operands); + } + /** * {@inheritDoc} */ @@ -540,10 +566,9 @@ protected ExprNodeColumnListDesc createExprsListExpr() { * {@inheritDoc} */ @Override - protected ExprNodeColumnListDesc addExprToExprsList(ExprNodeDesc columnList, ExprNodeDesc expr) { + protected void addExprToExprsList(ExprNodeDesc columnList, ExprNodeDesc expr) { ExprNodeColumnListDesc l = (ExprNodeColumnListDesc) columnList; l.addColumn(expr); - return l; } /** @@ -570,6 +595,14 @@ protected Object getConstantValue(ExprNodeDesc expr) { return ((ExprNodeConstantDesc) expr).getValue(); } + /** + * {@inheritDoc} + */ + @Override + protected String getConstantValueAsString(ExprNodeDesc expr) { + return ((ExprNodeConstantDesc) expr).getValue().toString(); + } + /** * {@inheritDoc} */ @@ -582,7 +615,7 @@ protected boolean isColumnRefExpr(Object o) { * {@inheritDoc} */ @Override - protected String getColumnName(ExprNodeDesc expr) { + protected String getColumnName(ExprNodeDesc expr, RowResolver rowResolver) { return ((ExprNodeColumnDesc) expr).getColumn(); } @@ -610,6 +643,24 @@ protected TypeInfo getTypeInfo(ExprNodeDesc expr) { return expr.getTypeInfo(); } + /** + * {@inheritDoc} + */ + @Override + protected List getStructTypeInfoList(ExprNodeDesc expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) expr.getTypeInfo(); + return structTypeInfo.getAllStructFieldTypeInfos(); + } + + /** + * {@inheritDoc} + */ + @Override + protected List getStructNameList(ExprNodeDesc expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) expr.getTypeInfo(); + return structTypeInfo.getAllStructFieldNames(); + } + /** * {@inheritDoc} */ @@ -642,6 +693,14 @@ protected boolean isPOSITIVEFuncCallExpr(ExprNodeDesc expr) { return FunctionRegistry.isOpPositive(expr); } + /** + * {@inheritDoc} + */ + @Override + protected boolean isNEGATIVEFuncCallExpr(ExprNodeDesc expr) { + return FunctionRegistry.isOpNegative(expr); + } + /** * {@inheritDoc} */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java index 623b280437..3e3d331412 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/ExprNodeTypeCheck.java @@ -21,6 +21,7 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -68,8 +69,10 @@ private ExprNodeTypeCheck() { /** * Transforms column information into the corresponding Hive ExprNode. */ - public static ExprNodeDesc toExprNodeDesc(ColumnInfo columnInfo) { + public static ExprNodeDesc toExprNode(ColumnInfo columnInfo, RowResolver rowResolver) + throws SemanticException { ExprNodeDescExprFactory factory = new ExprNodeDescExprFactory(); - return factory.toExpr(columnInfo); + return factory.toExpr(columnInfo, rowResolver, 0); } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/FunctionHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/FunctionHelper.java new file mode 100644 index 0000000000..944be83432 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/FunctionHelper.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Interface to handle function information while generating + * Calcite {@link RexNode}. + */ +public interface FunctionHelper { + + /** + * Returns function information based on function text. + */ + FunctionInfo getFunctionInfo(String functionText) throws SemanticException; + + /** + * Given function information and its inputs, it returns + * the type of the output of the function. + */ + RelDataType getReturnType(FunctionInfo functionInfo, List inputs) + throws SemanticException; + + /** + * Given function information, the inputs to that function, and the + * expected return type, it will return the list of inputs with any + * necessary adjustments, e.g., casting of expressions. + */ + List convertInputs(FunctionInfo functionInfo, List inputs, + RelDataType returnType) + throws SemanticException; + + /** + * Given function information and text, inputs to a function, and the + * expected return type, it will return an expression node containing + * the function call. + */ + RexNode getExpression(String functionText, FunctionInfo functionInfo, + List inputs, RelDataType returnType) + throws SemanticException; + + /** + * Returns aggregation information based on given parameters. + */ + AggregateInfo getAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException; + + /** + * Returns aggregation information for analytical function based on given parameters. + */ + AggregateInfo getWindowAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException; + + /** + * Folds expression according to function semantics. + */ + default RexNode foldExpression(RexNode expr) { + return expr; + } + + /** + * Class to store aggregate function related information. + */ + class AggregateInfo { + private final List parameters; + private final TypeInfo returnType; + private final String aggregateName; + private final boolean distinct; + + public AggregateInfo(List parameters, TypeInfo returnType, String aggregateName, + boolean distinct) { + this.parameters = ImmutableList.copyOf(parameters); + this.returnType = returnType; + this.aggregateName = aggregateName; + this.distinct = distinct; + } + + public List getParameters() { + return parameters; + } + + public TypeInfo getReturnType() { + return returnType; + } + + public String getAggregateName() { + return aggregateName; + } + + public boolean isDistinct() { + return distinct; + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java new file mode 100644 index 0000000000..9cdd6dabe2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java @@ -0,0 +1,491 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlBinaryOperator; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Function helper for Hive. + */ +public class HiveFunctionHelper implements FunctionHelper { + + private static final Logger LOG = LoggerFactory.getLogger(HiveFunctionHelper.class); + + private final RexBuilder rexBuilder; + + public HiveFunctionHelper(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + /** + * {@inheritDoc} + */ + @Override + public FunctionInfo getFunctionInfo(String functionText) + throws SemanticException { + return FunctionRegistry.getFunctionInfo(functionText); + } + + /** + * {@inheritDoc} + */ + @Override + public RelDataType getReturnType(FunctionInfo fi, List inputs) + throws SemanticException { + // 1) Gather inputs + ObjectInspector[] inputsOIs = new ObjectInspector[inputs.size()]; + for (int i = 0; i < inputsOIs.length; i++) { + inputsOIs[i] = createObjectInspector(inputs.get(i)); + } + // 2) Initialize and obtain return type + ObjectInspector oi = fi.getGenericUDF() != null ? + fi.getGenericUDF().initializeAndFoldConstants(inputsOIs) : + fi.getGenericUDTF().initialize(inputsOIs); + // 3) Convert to RelDataType + return TypeConverter.convert( + TypeInfoUtils.getTypeInfoFromObjectInspector(oi), rexBuilder.getTypeFactory()); + } + + /** + * {@inheritDoc} + */ + @Override + public List convertInputs(FunctionInfo fi, List inputs, + RelDataType returnType) + throws SemanticException { + // 1) Obtain UDF + final GenericUDF genericUDF = fi.getGenericUDF(); + final TypeInfo typeInfo = TypeConverter.convert(returnType); + TypeInfo targetType = null; + + boolean isNumeric = genericUDF instanceof GenericUDFBaseBinary + && typeInfo.getCategory() == Category.PRIMITIVE + && PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); + boolean isCompare = !isNumeric && genericUDF instanceof GenericUDFBaseCompare; + boolean isBetween = !isNumeric && genericUDF instanceof GenericUDFBetween; + boolean isIN = !isNumeric && genericUDF instanceof GenericUDFIn; + + if (isNumeric) { + targetType = typeInfo; + } else if (genericUDF instanceof GenericUDFBaseCompare) { + targetType = FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(0).getType()), TypeConverter.convert(inputs.get(1).getType())); + } else if (genericUDF instanceof GenericUDFBetween) { + assert inputs.size() == 4; + // We skip first child as is not involved (is the revert boolean) + // The target type needs to account for all 3 operands + targetType = FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(1).getType()), + FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(2).getType()), + TypeConverter.convert(inputs.get(3).getType()))); + } else if (genericUDF instanceof GenericUDFIn) { + // We're only considering the first element of the IN list for the type + assert inputs.size() > 1; + targetType = FunctionRegistry.getCommonClassForComparison( + TypeConverter.convert(inputs.get(0).getType()), + TypeConverter.convert(inputs.get(1).getType())); + } + + boolean isAllPrimitive = true; + if (targetType != null && targetType.getCategory() == Category.PRIMITIVE) { + List newInputs = new ArrayList<>(); + // Convert inputs if needed + for (int i = 0; i < inputs.size(); ++i) { + RexNode input = inputs.get(i); + TypeInfo inputTypeInfo = TypeConverter.convert(input.getType()); + RexNode tmpExprNode = input; + if (TypeInfoUtils.isConversionRequiredForComparison(targetType, inputTypeInfo)) { + if (isCompare || isBetween || isIN) { + // For compare, we will convert requisite children + // For BETWEEN skip the first child (the revert boolean) + if (!isBetween || i > 0) { + tmpExprNode = RexNodeTypeCheck.getExprNodeDefaultExprProcessor(rexBuilder) + .createConversionCast(input, (PrimitiveTypeInfo) targetType); + inputTypeInfo = TypeConverter.convert(tmpExprNode.getType()); + } + } else if (isNumeric) { + // For numeric, we'll do minimum necessary cast - if we cast to the type + // of expression, bad things will happen. + PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(inputTypeInfo, targetType); + tmpExprNode = RexNodeTypeCheck.getExprNodeDefaultExprProcessor(rexBuilder) + .createConversionCast(input, minArgType); + inputTypeInfo = TypeConverter.convert(tmpExprNode.getType()); + } else { + throw new AssertionError("Unexpected " + targetType + " - not a numeric op or compare"); + } + } + + isAllPrimitive = isAllPrimitive && inputTypeInfo.getCategory() == Category.PRIMITIVE; + newInputs.add(tmpExprNode); + } + return newInputs; + } + return inputs; + } + + /** + * {@inheritDoc} + */ + @Override + public RexNode getExpression(String functionText, FunctionInfo fi, + List inputs, RelDataType returnType) + throws SemanticException { + // See if this is an explicit cast. + RexNode expr = RexNodeConverter.handleExplicitCast( + fi.getGenericUDF(), returnType, inputs, rexBuilder); + + if (expr == null) { + // This is not a cast; process the function. + ImmutableList.Builder argsTypes = ImmutableList.builder(); + boolean isAllPrimitive = true; + for (RexNode input : inputs) { + argsTypes.add(input.getType()); + isAllPrimitive &= !input.getType().isStruct(); + } + SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(functionText, + fi.getGenericUDF(), argsTypes.build(), returnType); + if (calciteOp.getKind() == SqlKind.CASE) { + // If it is a case operator, we need to rewrite it + inputs = RexNodeConverter.rewriteCaseChildren(functionText, inputs, rexBuilder); + // Adjust branch types by inserting explicit casts if the actual is ambigous + inputs = RexNodeConverter.adjustCaseBranchTypes(inputs, returnType, rexBuilder); + } else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) { + // If it is a extract operator, we need to rewrite it + inputs = RexNodeConverter.rewriteExtractDateChildren(calciteOp, inputs, rexBuilder); + } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { + // If it is a floor operator, we need to rewrite it + inputs = RexNodeConverter.rewriteFloorDateChildren(calciteOp, inputs, rexBuilder); + } else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) { + if (inputs.size() == 2) { + // if it is a single item in an IN clause, transform A IN (B) to A = B + // from IN [A,B] => EQUALS [A,B] + // except complex types + calciteOp = SqlStdOperatorTable.EQUALS; + } else if (RexUtil.isReferenceOrAccess(inputs.get(0), true)){ + // if it is more than an single item in an IN clause, + // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] + // except complex types + // Rewrite to OR is done only if number of operands are less than + // the threshold configured + boolean rewriteToOr = true; + int maxNodesForInToOrTransformation; + try { + maxNodesForInToOrTransformation = HiveConf.getIntVar( + Hive.get().getConf(), HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES); + } catch (HiveException e) { + throw new SemanticException(e); + } + if(maxNodesForInToOrTransformation != 0) { + if(inputs.size() > maxNodesForInToOrTransformation) { + rewriteToOr = false; + } + } + if(rewriteToOr) { + inputs = RexNodeConverter.rewriteInClauseChildren(calciteOp, inputs, rexBuilder); + calciteOp = SqlStdOperatorTable.OR; + } + } + } else if (calciteOp.getKind() == SqlKind.COALESCE && + inputs.size() > 1) { + // Rewrite COALESCE as a CASE + // This allows to be further reduced to OR, if possible + calciteOp = SqlStdOperatorTable.CASE; + inputs = RexNodeConverter.rewriteCoalesceChildren(inputs, rexBuilder); + // Adjust branch types by inserting explicit casts if the actual is ambigous + inputs = RexNodeConverter.adjustCaseBranchTypes(inputs, returnType, rexBuilder); + } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { + inputs = RexNodeConverter.rewriteToDateChildren(inputs, rexBuilder); + } else if (calciteOp.getKind() == SqlKind.BETWEEN) { + assert inputs.get(0).isAlwaysTrue() || inputs.get(0).isAlwaysFalse(); + boolean invert = inputs.get(0).isAlwaysTrue(); + SqlBinaryOperator cmpOp; + if (invert) { + calciteOp = SqlStdOperatorTable.OR; + cmpOp = SqlStdOperatorTable.GREATER_THAN; + } else { + calciteOp = SqlStdOperatorTable.AND; + cmpOp = SqlStdOperatorTable.LESS_THAN_OR_EQUAL; + } + RexNode op = inputs.get(1); + RexNode rangeL = inputs.get(2); + RexNode rangeH = inputs.get(3); + inputs = new ArrayList<>(); + inputs.add(rexBuilder.makeCall(cmpOp, rangeL, op)); + inputs.add(rexBuilder.makeCall(cmpOp, op, rangeH)); + } + expr = rexBuilder.makeCall(returnType, calciteOp, inputs); + } + + if (expr instanceof RexCall && !expr.isA(SqlKind.CAST)) { + RexCall call = (RexCall) expr; + expr = rexBuilder.makeCall(returnType, call.getOperator(), + RexUtil.flatten(call.getOperands(), call.getOperator())); + } + + return expr; + } + + /** + * {@inheritDoc} + */ + @Override + public AggregateInfo getAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException { + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode( + GroupByDesc.Mode.COMPLETE, isDistinct); + List aggParameterOIs = new ArrayList<>(); + for (RexNode aggParameter : aggregateParameters) { + aggParameterOIs.add(createObjectInspector(aggParameter)); + } + GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator2( + aggregateName, aggParameterOIs, null, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo2( + genericUDAFEvaluator, udafMode, aggParameterOIs); + return new AggregateInfo(aggregateParameters, udaf.returnType, aggregateName, isDistinct); + } + + /** + * {@inheritDoc} + */ + @Override + public AggregateInfo getWindowAggregateFunctionInfo(boolean isDistinct, boolean isAllColumns, + String aggregateName, List aggregateParameters) + throws SemanticException { + TypeInfo returnType = null; + + if (FunctionRegistry.isRankingFunction(aggregateName)) { + // Rank functions type is 'int'/'double' + if (aggregateName.equalsIgnoreCase("percent_rank")) { + returnType = TypeInfoFactory.doubleTypeInfo; + } else { + returnType = TypeInfoFactory.intTypeInfo; + } + } else { + // Try obtaining UDAF evaluators to determine the ret type + try { + Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode( + GroupByDesc.Mode.COMPLETE, isDistinct); + List aggParameterOIs = new ArrayList<>(); + for (RexNode aggParameter : aggregateParameters) { + aggParameterOIs.add(createObjectInspector(aggParameter)); + } + if (aggregateName.toLowerCase().equals(FunctionRegistry.LEAD_FUNC_NAME) + || aggregateName.toLowerCase().equals(FunctionRegistry.LAG_FUNC_NAME)) { + GenericUDAFEvaluator genericUDAFEvaluator = FunctionRegistry.getGenericWindowingEvaluator(aggregateName, + aggParameterOIs, isDistinct, isAllColumns); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo2( + genericUDAFEvaluator, udafMode, aggParameterOIs); + returnType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); + } else { + GenericUDAFEvaluator genericUDAFEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator2( + aggregateName, aggParameterOIs, null, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo2( + genericUDAFEvaluator, udafMode, aggParameterOIs); + if (FunctionRegistry.pivotResult(aggregateName)) { + returnType = ((ListTypeInfo) udaf.returnType).getListElementTypeInfo(); + } else { + returnType = udaf.returnType; + } + } + } catch (Exception e) { + LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggregateName + + ", trying to translate to GenericUDF"); + } + } + + return returnType != null ? + new AggregateInfo(aggregateParameters, returnType, aggregateName, isDistinct) : null; + } + + private ObjectInspector createObjectInspector(RexNode expr) { + ObjectInspector oi = createConstantObjectInspector(expr); + if (oi != null) { + return oi; + } + return TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeConverter.convert(expr.getType())); + } + + /** + * Returns constant object inspector or null if it could not be generated. + */ + private ConstantObjectInspector createConstantObjectInspector(RexNode expr) { + if (RexUtil.isLiteral(expr, true)) { // Literal or cast on literal + final ExprNodeConstantDesc constant; + if (expr.isA(SqlKind.LITERAL)) { + constant = ExprNodeConverter.toExprNodeConstantDesc((RexLiteral) expr); + } else { + RexNode foldedExpr = foldExpression(expr); + if (!foldedExpr.isA(SqlKind.LITERAL)) { + // Constant could not be generated + return null; + } + constant = ExprNodeConverter.toExprNodeConstantDesc((RexLiteral) foldedExpr); + } + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) constant.getTypeInfo(); + Object value = constant.getValue(); + Object writableValue = value == null ? null : + PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(typeInfo) + .getPrimitiveWritableObject(value); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + typeInfo, writableValue); + } else if (expr instanceof RexCall) { + RexCall call = (RexCall) expr; + if (call.getOperator() == SqlStdOperatorTable.ROW) { // Struct + List fieldNames = new ArrayList<>(); + List fieldObjectInspectors = new ArrayList<>(); + List writableValues = new ArrayList<>(); + for (int i = 0; i < call.getOperands().size(); i++) { + RexNode input = call.getOperands().get(i); + ConstantObjectInspector objectInspector = createConstantObjectInspector(input); + if (objectInspector == null) { + // Constant could not be generated + return null; + } + fieldNames.add(expr.getType().getFieldList().get(i).getName()); + fieldObjectInspectors.add(objectInspector); + writableValues.add(objectInspector.getWritableConstantValue()); + } + return ObjectInspectorFactory.getStandardConstantStructObjectInspector( + fieldNames, + fieldObjectInspectors, + writableValues); + } else if (call.getOperator() == SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR) { // List + ListTypeInfo listTypeInfo = (ListTypeInfo) TypeConverter.convert(expr.getType()); + TypeInfo typeInfo = listTypeInfo.getListElementTypeInfo(); + List writableValues = new ArrayList<>(); + for (RexNode input : call.getOperands()) { + ConstantObjectInspector objectInspector = createConstantObjectInspector(input); + if (objectInspector == null) { + // Constant could not be generated + return null; + } + writableValues.add(objectInspector.getWritableConstantValue()); + } + return ObjectInspectorFactory.getStandardConstantListObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector( + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo), + ObjectInspectorCopyOption.WRITABLE), + writableValues); + } else if (call.getOperator() == SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR) { // Map + MapTypeInfo mapTypeInfo = (MapTypeInfo) TypeConverter.convert(expr.getType()); + Map writableValues = new HashMap<>(); + Iterator it = call.getOperands().iterator(); + while (it.hasNext()) { + ConstantObjectInspector keyObjectInspector = createConstantObjectInspector(it.next()); + if (keyObjectInspector == null) { + // Constant could not be generated + return null; + } + ConstantObjectInspector valueObjectInspector = createConstantObjectInspector(it.next()); + if (valueObjectInspector == null) { + // Constant could not be generated + return null; + } + writableValues.put( + keyObjectInspector.getWritableConstantValue(), + valueObjectInspector.getWritableConstantValue()); + } + return ObjectInspectorFactory.getStandardConstantMapObjectInspector( + ObjectInspectorUtils.getStandardObjectInspector( + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo()), + ObjectInspectorCopyOption.WRITABLE), + ObjectInspectorUtils.getStandardObjectInspector( + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo()), + ObjectInspectorCopyOption.WRITABLE), + writableValues); + } + } + // Constant could not be generated + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public RexNode foldExpression(RexNode expr) { + HiveRexExecutorImpl executor = new HiveRexExecutorImpl(); + List result = new ArrayList<>(); + executor.reduce(rexBuilder, ImmutableList.of(expr), result); + return result.get(0); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java index 319bae6dbe..309a34ef88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/JoinCondTypeCheckProcFactory.java @@ -86,7 +86,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (!qualifiedAccess) { colInfo = getColInfo(ctx, null, tableOrCol, expr); // It's a column. - return exprFactory.createColumnRefExpr(colInfo); + return exprFactory.createColumnRefExpr(colInfo, ctx.getInputRRList()); } else if (hasTableAlias(ctx, tableOrCol, expr)) { return null; } else { @@ -160,12 +160,13 @@ protected ColumnExprProcessor getColumnExprProcessor() { @Override protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException { + JoinTypeCheckCtx jctx = (JoinTypeCheckCtx) ctx; String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) .getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. ColumnInfo colInfo = getColInfo((JoinTypeCheckCtx) ctx, tableAlias, - exprFactory.getConstantValue((T) nodeOutputs[1]).toString(), expr); + exprFactory.getConstantValueAsString((T) nodeOutputs[1]), expr); if (colInfo == null) { ctx.setError(ASTErrorUtils.getMsg( @@ -174,7 +175,7 @@ protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, } ColumnInfo newColumnInfo = new ColumnInfo(colInfo); newColumnInfo.setTabAlias(tableAlias); - return exprFactory.createColumnRefExpr(newColumnInfo); + return exprFactory.createColumnRefExpr(newColumnInfo, jctx.getInputRRList()); } private ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java new file mode 100644 index 0000000000..bea146be1f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java @@ -0,0 +1,1063 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableList; +import java.math.BigDecimal; +import java.time.Instant; +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.apache.calcite.avatica.util.TimeUnit; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlCollation; +import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlQuantifyOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ConversionUtil; +import org.apache.calcite.util.DateString; +import org.apache.calcite.util.NlsString; +import org.apache.calcite.util.TimestampString; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory.HiveNlsString.Interpretation; +import org.apache.hadoop.hive.ql.plan.SubqueryType; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Expression factory for Calcite {@link RexNode}. + */ +public class RexNodeExprFactory extends ExprFactory { + + private static final Logger LOG = LoggerFactory.getLogger(RexNodeExprFactory.class); + + private final RexBuilder rexBuilder; + private final FunctionHelper functionHelper; + + public RexNodeExprFactory(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + this.functionHelper = new HiveFunctionHelper(rexBuilder); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isExprInstance(Object o) { + return o instanceof RexNode; + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode toExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws CalciteSemanticException { + ObjectInspector inspector = colInfo.getObjectInspector(); + if (inspector instanceof ConstantObjectInspector && inspector instanceof PrimitiveObjectInspector) { + return toPrimitiveConstDesc(colInfo, inspector, rexBuilder); + } + int index = rowResolver.getPosition(colInfo.getInternalName()); + if (index < 0) { + throw new CalciteSemanticException("Unexpected error: Cannot find column"); + } + return rexBuilder.makeInputRef( + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index + offset); + } + + private static RexNode toPrimitiveConstDesc( + ColumnInfo colInfo, ObjectInspector inspector, RexBuilder rexBuilder) + throws CalciteSemanticException { + Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue(); + return rexBuilder.makeLiteral(constant, + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createColumnRefExpr(ColumnInfo colInfo, RowResolver rowResolver, int offset) + throws CalciteSemanticException { + int index = rowResolver.getPosition(colInfo.getInternalName()); + return rexBuilder.makeInputRef( + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index + offset); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createColumnRefExpr(ColumnInfo colInfo, List rowResolverList) + throws SemanticException { + int index = getPosition(colInfo, rowResolverList); + return rexBuilder.makeInputRef( + TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index); + } + + private int getPosition(ColumnInfo colInfo, List rowResolverList) + throws SemanticException { + ColumnInfo tmp; + ColumnInfo cInfoToRet = null; + int position = 0; + for (RowResolver rr : rowResolverList) { + tmp = rr.get(colInfo.getTabAlias(), colInfo.getAlias()); + if (tmp != null) { + if (cInfoToRet != null) { + throw new CalciteSemanticException("Could not resolve column name"); + } + cInfoToRet = tmp; + position += rr.getPosition(cInfoToRet.getInternalName()); + } else if (cInfoToRet == null) { + position += rr.getColumnInfos().size(); + } + } + return position; + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createNullConstantExpr() { + return rexBuilder.makeNullLiteral( + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createBooleanConstantExpr(String value) { + Boolean b = value != null ? Boolean.valueOf(value) : null; + return rexBuilder.makeLiteral(b, + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BOOLEAN), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createBigintConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Long.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createIntConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Integer.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.INTEGER), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createSmallintConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Short.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.SMALLINT), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createTinyintConstantExpr(String value) { + return rexBuilder.makeLiteral( + new BigDecimal(Byte.valueOf(value)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TINYINT), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createFloatConstantExpr(String value) { + Float f = Float.valueOf(value); + return rexBuilder.makeApproxLiteral( + new BigDecimal(Float.toString(f)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.FLOAT)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createDoubleConstantExpr(String value) throws SemanticException { + Double d = Double.valueOf(value); + // TODO: The best solution is to support NaN in expression reduction. + if (Double.isNaN(d)) { + throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal); + } + return rexBuilder.makeApproxLiteral( + new BigDecimal(Double.toString(d)), + rexBuilder.getTypeFactory().createSqlType(SqlTypeName.DOUBLE)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createDecimalConstantExpr(String value, boolean allowNullValueConstantExpr) { + HiveDecimal hd = HiveDecimal.create(value); + if (!allowNullValueConstantExpr && hd == null) { + return null; + } + DecimalTypeInfo type = adjustType(hd); + return rexBuilder.makeExactLiteral( + hd != null ? hd.bigDecimalValue() : null, + TypeConverter.convert(type, rexBuilder.getTypeFactory())); + } + + @Override + protected TypeInfo adjustConstantType(PrimitiveTypeInfo targetType, Object constantValue) { + if (constantValue instanceof HiveDecimal) { + return adjustType((HiveDecimal) constantValue); + } + return targetType; + } + + private DecimalTypeInfo adjustType(HiveDecimal hd) { + // Note: the normalize() call with rounding in HiveDecimal will currently reduce the + // precision and scale of the value by throwing away trailing zeroes. This may or may + // not be desirable for the literals; however, this used to be the default behavior + // for explicit decimal literals (e.g. 1.0BD), so we keep this behavior for now. + int prec = 1; + int scale = 0; + if (hd != null) { + prec = hd.precision(); + scale = hd.scale(); + } + DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale); + return typeInfo; + } + + /** + * {@inheritDoc} + */ + @Override + protected Object interpretConstantAsPrimitive(PrimitiveTypeInfo targetType, Object constantValue, + PrimitiveTypeInfo sourceType) { + // Extract string value if necessary + Object constantToInterpret = constantValue; + if (constantValue instanceof HiveNlsString) { + constantToInterpret = ((HiveNlsString) constantValue).getValue(); + } + + if (constantToInterpret instanceof Number || constantToInterpret instanceof String) { + try { + PrimitiveTypeEntry primitiveTypeEntry = targetType.getPrimitiveTypeEntry(); + if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).intValueExact(); + } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).longValueExact(); + } else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { + return Double.valueOf(constantToInterpret.toString()); + } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { + return Float.valueOf(constantToInterpret.toString()); + } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).byteValueExact(); + } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) { + return toBigDecimal(constantToInterpret.toString()).shortValueExact(); + } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) { + HiveDecimal decimal = HiveDecimal.create(constantToInterpret.toString()); + return decimal != null ? decimal.bigDecimalValue() : null; + } + } catch (NumberFormatException | ArithmeticException nfe) { + LOG.trace("Failed to narrow type of constant", nfe); + return null; + } + } + + // Comparision of decimal and float/double happens in float/double. + if (constantToInterpret instanceof BigDecimal) { + BigDecimal bigDecimal = (BigDecimal) constantToInterpret; + + PrimitiveTypeEntry primitiveTypeEntry = targetType.getPrimitiveTypeEntry(); + if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { + return bigDecimal.doubleValue(); + } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { + return bigDecimal.floatValue(); + } + return bigDecimal; + } + + String constTypeInfoName = sourceType.getTypeName(); + if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + // because a comparison against a "string" will happen in "string" type. + // to avoid unintentional comparisons in "string" + // constants which are representing char/varchar values must be converted to the + // appropriate type. + if (targetType instanceof CharTypeInfo) { + final String constValue = constantToInterpret.toString(); + final int length = TypeInfoUtils.getCharacterLengthForType(targetType); + HiveChar newValue = new HiveChar(constValue, length); + HiveChar maxCharConst = new HiveChar(constValue, HiveChar.MAX_CHAR_LENGTH); + if (maxCharConst.equals(newValue)) { + return makeHiveUnicodeString(Interpretation.CHAR, newValue.getValue()); + } else { + return null; + } + } + if (targetType instanceof VarcharTypeInfo) { + final String constValue = constantToInterpret.toString(); + final int length = TypeInfoUtils.getCharacterLengthForType(targetType); + HiveVarchar newValue = new HiveVarchar(constValue, length); + HiveVarchar maxCharConst = new HiveVarchar(constValue, HiveVarchar.MAX_VARCHAR_LENGTH); + if (maxCharConst.equals(newValue)) { + return makeHiveUnicodeString(Interpretation.VARCHAR, newValue.getValue()); + } else { + return null; + } + } + } + + return constantValue; + } + + private BigDecimal toBigDecimal(String val) { + if (!NumberUtils.isNumber(val)) { + throw new NumberFormatException("The given string is not a valid number: " + val); + } + return new BigDecimal(val.replaceAll("[dDfFlL]$", "")); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createStringConstantExpr(String value) { + return rexBuilder.makeCharLiteral( + makeHiveUnicodeString(Interpretation.STRING, value)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createDateConstantExpr(String value) { + Date d = Date.valueOf(value); + return rexBuilder.makeDateLiteral( + DateString.fromDaysSinceEpoch(d.toEpochDay())); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createTimestampConstantExpr(String value) { + Timestamp t = Timestamp.valueOf(value); + return (RexLiteral) rexBuilder.makeLiteral( + TimestampString.fromMillisSinceEpoch(t.toEpochMilli()).withNanos(t.getNanos()), + rexBuilder.getTypeFactory().createSqlType( + SqlTypeName.TIMESTAMP, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), + false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createTimestampLocalTimeZoneConstantExpr(String value, ZoneId zoneId) { + TimestampTZ t = TimestampTZUtil.parse(value); + + final TimestampString tsLocalTZString; + if (value == null) { + tsLocalTZString = null; + } else { + Instant i = t.getZonedDateTime().toInstant(); + tsLocalTZString = TimestampString + .fromMillisSinceEpoch(i.toEpochMilli()) + .withNanos(i.getNano()); + } + return rexBuilder.makeTimestampWithLocalTimeZoneLiteral( + tsLocalTZString, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalYearMonthConstantExpr(String value) { + BigDecimal totalMonths = BigDecimal.valueOf(HiveIntervalYearMonth.valueOf(value).getTotalMonths()); + return rexBuilder.makeIntervalLiteral(totalMonths, + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalDayTimeConstantExpr(String value) { + HiveIntervalDayTime v = HiveIntervalDayTime.valueOf(value); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalYearConstantExpr(String value) { + HiveIntervalYearMonth v = new HiveIntervalYearMonth(Integer.parseInt(value), 0); + BigDecimal totalMonths = BigDecimal.valueOf(v.getTotalMonths()); + return rexBuilder.makeIntervalLiteral(totalMonths, + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalMonthConstantExpr(String value) { + BigDecimal totalMonths = BigDecimal.valueOf(Integer.parseInt(value)); + return rexBuilder.makeIntervalLiteral(totalMonths, + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalDayConstantExpr(String value) { + HiveIntervalDayTime v = new HiveIntervalDayTime(Integer.parseInt(value), 0, 0, 0, 0); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalHourConstantExpr(String value) { + HiveIntervalDayTime v = new HiveIntervalDayTime(0, Integer.parseInt(value), 0, 0, 0); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalMinuteConstantExpr(String value) { + HiveIntervalDayTime v = new HiveIntervalDayTime(0, 0, Integer.parseInt(value), 0, 0); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexLiteral createIntervalSecondConstantExpr(String value) { + BigDecimal bd = new BigDecimal(value); + BigDecimal bdSeconds = new BigDecimal(bd.toBigInteger()); + BigDecimal bdNanos = bd.subtract(bdSeconds); + HiveIntervalDayTime v = new HiveIntervalDayTime(0, 0, 0, bdSeconds.intValueExact(), + bdNanos.multiply(NANOS_PER_SEC_BD).intValue()); + BigDecimal secsValueBd = BigDecimal + .valueOf(v.getTotalSeconds() * 1000); + BigDecimal nanosValueBd = BigDecimal.valueOf((v).getNanos(), 6); + return rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createStructExpr(TypeInfo typeInfo, List operands) + throws CalciteSemanticException { + assert typeInfo instanceof StructTypeInfo; + return rexBuilder.makeCall( + TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), + SqlStdOperatorTable.ROW, + operands); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createConstantExpr(TypeInfo typeInfo, Object constantValue) + throws CalciteSemanticException { + if (typeInfo instanceof StructTypeInfo) { + List typeList = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos(); + List objectList = (List) constantValue; + List operands = new ArrayList<>(); + for (int i = 0; i < typeList.size(); i++) { + operands.add( + rexBuilder.makeLiteral( + objectList.get(i), + TypeConverter.convert(typeList.get(i), rexBuilder.getTypeFactory()), + false)); + } + return rexBuilder.makeCall( + TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), + SqlStdOperatorTable.ROW, + operands); + } + return rexBuilder.makeLiteral(constantValue, + TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), false); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createNestedColumnRefExpr( + TypeInfo typeInfo, RexNode expr, String fieldName, Boolean isList) throws CalciteSemanticException { + if (expr.getType().isStruct()) { + // regular case of accessing nested field in a column + return rexBuilder.makeFieldAccess(expr, fieldName, true); + } else { + // This may happen for schema-less tables, where columns are dynamically + // supplied by serdes. + throw new CalciteSemanticException("Unexpected rexnode : " + + expr.getClass().getCanonicalName(), UnsupportedFeature.Schema_less_table); + } + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createFuncCallExpr(TypeInfo returnType, GenericUDF genericUDF, + List inputs) throws SemanticException { + final String funcText = genericUDF.getClass().getAnnotation(Description.class).name(); + final FunctionInfo functionInfo = functionHelper.getFunctionInfo(funcText); + return functionHelper.getExpression( + funcText, functionInfo, inputs, + TypeConverter.convert(returnType, rexBuilder.getTypeFactory())); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createFuncCallExpr(GenericUDF genericUDF, String funcText, + List inputs) throws SemanticException { + // 1) Function resolution + final FunctionInfo functionInfo = functionHelper.getFunctionInfo(funcText); + // 2) Compute return type + RelDataType returnType; + if (genericUDF instanceof SettableUDF) { + returnType = TypeConverter.convert( + ((SettableUDF) genericUDF).getTypeInfo(), rexBuilder.getTypeFactory()); + } else { + returnType = functionHelper.getReturnType(functionInfo, inputs); + } + // 3) Convert inputs (if necessary) + List newInputs = functionHelper.convertInputs( + functionInfo, inputs, returnType); + // 4) Return Calcite function + return functionHelper.getExpression( + funcText, functionInfo, newInputs, returnType); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createExprsListExpr() { + return new HiveRexExprList(); + } + + /** + * {@inheritDoc} + */ + @Override + protected void addExprToExprsList(RexNode columnList, RexNode expr) { + HiveRexExprList l = (HiveRexExprList) columnList; + l.addExpression(expr); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isConstantExpr(Object o) { + return o instanceof RexLiteral; + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isFuncCallExpr(Object o) { + return o instanceof RexCall; + } + + /** + * {@inheritDoc} + */ + @Override + protected Object getConstantValue(RexNode expr) { + if (expr.getType().getSqlTypeName() == SqlTypeName.ROW) { + List res = new ArrayList<>(); + for (RexNode node : ((RexCall) expr).getOperands()) { + res.add(((RexLiteral) node).getValue4()); + } + return res; + } + return ((RexLiteral) expr).getValue4(); + } + + /** + * {@inheritDoc} + */ + @Override + protected String getConstantValueAsString(RexNode expr) { + return ((RexLiteral) expr).getValueAs(String.class); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isColumnRefExpr(Object o) { + return o instanceof RexNode && RexUtil.isReferenceOrAccess((RexNode) o, true); + } + + /** + * {@inheritDoc} + */ + @Override + protected String getColumnName(RexNode expr, RowResolver rowResolver) { + int index = ((RexInputRef) expr).getIndex(); + return rowResolver.getColumnInfos().get(index).getInternalName(); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isExprsListExpr(Object o) { + return o instanceof HiveRexExprList; + } + + /** + * {@inheritDoc} + */ + @Override + protected List getExprChildren(RexNode expr) { + if (expr instanceof RexCall) { + return ((RexCall) expr).getOperands(); + } else if (expr instanceof HiveRexExprList) { + return ((HiveRexExprList) expr).getExpressions(); + } + return new ArrayList<>(); + } + + /** + * {@inheritDoc} + */ + @Override + protected TypeInfo getTypeInfo(RexNode expr) { + return expr.isA(SqlKind.LITERAL) ? + TypeConverter.convertLiteralType((RexLiteral) expr) : + TypeConverter.convert(expr.getType()); + } + + /** + * {@inheritDoc} + */ + @Override + protected List getStructTypeInfoList(RexNode expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) TypeConverter.convert(expr.getType()); + return structTypeInfo.getAllStructFieldTypeInfos(); + } + + /** + * {@inheritDoc} + */ + @Override + protected List getStructNameList(RexNode expr) { + StructTypeInfo structTypeInfo = (StructTypeInfo) TypeConverter.convert(expr.getType()); + return structTypeInfo.getAllStructFieldNames(); + } + + /** + * {@inheritDoc} + */ + @Override + protected List rewriteINIntoORFuncCallExpr(List inOperands) throws SemanticException { + return transformIntoOrAndClause(inOperands, rexBuilder); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isORFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.OR); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isANDFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.AND); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isPOSITIVEFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.PLUS_PREFIX); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isNEGATIVEFuncCallExpr(RexNode expr) { + return expr.isA(SqlKind.MINUS_PREFIX); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode setTypeInfo(RexNode expr, TypeInfo type) throws CalciteSemanticException { + RelDataType t = TypeConverter.convert(type, rexBuilder.getTypeFactory()); + if (expr instanceof RexCall) { + RexCall call = (RexCall) expr; + return rexBuilder.makeCall(t, + call.getOperator(), call.getOperands()); + } else if (expr instanceof RexInputRef) { + RexInputRef inputRef = (RexInputRef) expr; + return rexBuilder.makeInputRef(t, inputRef.getIndex()); + } else if (expr instanceof RexLiteral) { + RexLiteral literal = (RexLiteral) expr; + return rexBuilder.makeLiteral(RexLiteral.value(literal), t, false); + } + throw new RuntimeException("Unsupported expression type: " + expr.getClass().getCanonicalName()); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean canConvertCASEIntoCOALESCEFuncCallExpr(GenericUDF genericUDF, List inputs) { + if (genericUDF instanceof GenericUDFWhen && inputs.size() == 3 && + inputs.get(1) instanceof RexLiteral && + inputs.get(2) instanceof RexLiteral) { + RexLiteral constThen = (RexLiteral) inputs.get(1); + RexLiteral constElse = (RexLiteral) inputs.get(2); + Object thenVal = constThen.getValue(); + Object elseVal = constElse.getValue(); + if (thenVal instanceof Boolean && elseVal instanceof Boolean) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode foldExpr(RexNode expr) { + return functionHelper.foldExpression(expr); + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isSTRUCTFuncCallExpr(RexNode expr) { + return expr instanceof RexCall && + ((RexCall) expr).getOperator() == SqlStdOperatorTable.ROW; + } + + /** + * {@inheritDoc} + */ + @Override + protected boolean isConstantStruct(RexNode expr) { + return expr.getType().getSqlTypeName() == SqlTypeName.ROW && + HiveCalciteUtil.isConstant(expr); + } + + /** + * {@inheritDoc} + */ + @Override + protected RexNode createSubqueryExpr(TypeCheckCtx ctx, ASTNode expr, SubqueryType subqueryType, + Object[] inputs) throws SemanticException { + // subqueryToRelNode might be null if subquery expression anywhere other than + // as expected in filter (where/having). We should throw an appropriate error + // message + Map subqueryToRelNode = ctx.getSubqueryToRelNode(); + if (subqueryToRelNode == null) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + " Currently SubQuery expressions are only allowed as " + + "Where and Having Clause predicates")); + } + + ASTNode subqueryOp = (ASTNode) expr.getChild(0); + RelNode subqueryRel = subqueryToRelNode.get(expr); + // For now because subquery is only supported in filter + // we will create subquery expression of boolean type + switch (subqueryType) { + case EXISTS: { + return RexSubQuery.exists(subqueryRel); + } + case IN: { + assert (inputs[2] != null); + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if(subqueryRel.getRowType().getFieldCount() > 1) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "SubQuery can contain only 1 item in Select List.")); + } + //create RexNode for LHS + RexNode lhs = (RexNode) inputs[2]; + //create RexSubQuery node + return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + } + case SCALAR: { + // only single subquery expr is supported + if (subqueryRel.getRowType().getFieldCount() != 1) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "More than one column expression in subquery")); + } + if(subqueryRel.getRowType().getFieldCount() > 1) { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "SubQuery can contain only 1 item in Select List.")); + } + //create RexSubQuery node + return RexSubQuery.scalar(subqueryRel); + } + case SOME: + case ALL: { + assert (inputs[2] != null); + //create RexNode for LHS + RexNode lhs = (RexNode) inputs[2]; + return convertSubquerySomeAll(subqueryRel.getCluster(), + (ASTNode) subqueryOp.getChild(1), subqueryType, subqueryRel, lhs); + } + default: + return null; + } + } + + private static void throwInvalidSubqueryError(final ASTNode comparisonOp) throws SemanticException { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + + public static RexNode convertSubquerySomeAll(final RelOptCluster cluster, final ASTNode comparisonOp, + final SubqueryType subqueryType, final RelNode subqueryRel, final RexNode rexNodeLhs) + throws SemanticException { + SqlQuantifyOperator quantifyOperator = null; + switch (comparisonOp.getType()) { + case HiveParser.EQUAL: + if(subqueryType == SubqueryType.ALL) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_EQ; + break; + case HiveParser.LESSTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_LT; + break; + case HiveParser.LESSTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_LE; + break; + case HiveParser.GREATERTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_GT; + break; + case HiveParser.GREATERTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_GE; + break; + case HiveParser.NOTEQUAL: + if(subqueryType == SubqueryType.SOME) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_NE; + break; + default: + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + if(subqueryType == SubqueryType.ALL) { + quantifyOperator = SqlStdOperatorTable.some(quantifyOperator.comparisonKind.negateNullSafe()); + } + + RexNode someQuery = getSomeSubquery(cluster, subqueryRel, rexNodeLhs, quantifyOperator); + if(subqueryType == SubqueryType.ALL) { + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, someQuery); + } + return someQuery; + } + + private static RexNode getSomeSubquery(final RelOptCluster cluster, + final RelNode subqueryRel, final RexNode lhs, + final SqlQuantifyOperator quantifyOperator) { + if(quantifyOperator == SqlStdOperatorTable.SOME_EQ) { + return RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + } else if (quantifyOperator == SqlStdOperatorTable.SOME_NE) { + RexSubQuery subQuery = RexSubQuery.in(subqueryRel, ImmutableList.of(lhs)); + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, subQuery); + } else { + return RexSubQuery.some(subqueryRel, ImmutableList.of(lhs), quantifyOperator); + } + } + + public static NlsString makeHiveUnicodeString(Interpretation interpretation, String text) { + return new HiveNlsString(interpretation, text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT); + } + + public static class HiveNlsString extends NlsString { + + public enum Interpretation { + CHAR, VARCHAR, STRING; + } + + public final Interpretation interpretation; + + public HiveNlsString(Interpretation interpretation, String value, String charsetName, SqlCollation collation) { + super(value, charsetName, collation); + this.interpretation = interpretation; + } + } + + protected static List transformIntoOrAndClause(List operands, RexBuilder rexBuilder) { + final List disjuncts = new ArrayList<>(operands.size() - 2); + if (operands.get(0).getKind() != SqlKind.ROW) { + final RexNode columnExpression = operands.get(0); + for (int i = 1; i < operands.size(); i++) { + disjuncts.add(rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, + columnExpression, + operands.get(i))); + } + } else { + final RexCall columnExpressions = (RexCall) operands.get(0); + for (int i = 1; i < operands.size(); i++) { + List conjuncts = new ArrayList<>(columnExpressions.getOperands().size() - 1); + RexCall valueExpressions = (RexCall) operands.get(i); + for (int j = 0; j < columnExpressions.getOperands().size(); j++) { + conjuncts.add(rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, + columnExpressions.getOperands().get(j), + valueExpressions.getOperands().get(j))); + } + if (conjuncts.size() > 1) { + disjuncts.add(rexBuilder.makeCall( + SqlStdOperatorTable.AND, + conjuncts)); + } else { + disjuncts.add(conjuncts.get(0)); + } + } + } + return disjuncts; + } + + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java new file mode 100644 index 0000000000..8f91ee8841 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeTypeCheck.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse.type; + +import com.google.common.collect.ImmutableBiMap; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class RexNodeTypeCheck { + + /** + * Given an AST expression and a context, it will produce a map from AST nodes + * to Calcite RexNode. + */ + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + TypeCheckProcFactory factory = + new TypeCheckProcFactory<>(new RexNodeExprFactory(tcCtx.getRexBuilder())); + return factory.genExprNode(expr, tcCtx); + } + + /** + * Returns the default processor to generate Calcite RexNode from AST nodes. + */ + public static TypeCheckProcFactory.DefaultExprProcessor getExprNodeDefaultExprProcessor(RexBuilder rexBuilder) { + TypeCheckProcFactory factory = + new TypeCheckProcFactory<>(new RexNodeExprFactory(rexBuilder)); + return factory.getDefaultExprProcessor(); + } + + /** + * Given an AST join expression and a context, it will produce a map from AST nodes + * to Calcite RexNode. + */ + public static Map genExprNodeJoinCond(ASTNode expr, TypeCheckCtx tcCtx, RexBuilder rexBuilder) + throws SemanticException { + JoinCondTypeCheckProcFactory typeCheckProcFactory = + new JoinCondTypeCheckProcFactory<>(new RexNodeExprFactory(rexBuilder)); + return typeCheckProcFactory.genExprNode(expr, tcCtx); + } + + /** + * Transforms column information into the corresponding Calcite RexNode. + */ + public static RexNode toExprNode(ColumnInfo columnInfo, RowResolver rowResolver, int offset, RexBuilder rexBuilder) + throws SemanticException { + RexNodeExprFactory factory = new RexNodeExprFactory(rexBuilder); + return factory.toExpr(columnInfo, rowResolver, offset); + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java index 67875cc31f..d4ef3faec0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckCtx.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse.type; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexBuilder; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.RowResolver; @@ -93,6 +94,8 @@ private final boolean allowSubQueryExpr; + private RexBuilder rexBuilder; + /** * Constructor. * @@ -103,11 +106,27 @@ public TypeCheckCtx(RowResolver inputRR) { this(inputRR, true, false); } + public TypeCheckCtx(RowResolver inputRR, RexBuilder rexBuilder) { + this(inputRR, rexBuilder, true, false); + } + public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr) { this(inputRR, useCaching, foldExpr, false, true, true, true, true, true, true, true); } + public TypeCheckCtx(RowResolver inputRR, RexBuilder rexBuilder, boolean useCaching, boolean foldExpr) { + this(inputRR, rexBuilder, useCaching, foldExpr, false, true, true, true, true, true, true, true); + } + public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr, + boolean allowStatefulFunctions, boolean allowDistinctFunctions, boolean allowGBExprElimination, + boolean allowAllColRef, boolean allowFunctionStar, boolean allowWindowing, + boolean allowIndexExpr, boolean allowSubQueryExpr) { + this(inputRR, null, useCaching, foldExpr, allowStatefulFunctions, allowDistinctFunctions, allowGBExprElimination, + allowAllColRef, allowFunctionStar, allowWindowing, allowIndexExpr, allowSubQueryExpr); + } + + public TypeCheckCtx(RowResolver inputRR, RexBuilder rexBuilder, boolean useCaching, boolean foldExpr, boolean allowStatefulFunctions, boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, boolean allowFunctionStar, boolean allowWindowing, boolean allowIndexExpr, boolean allowSubQueryExpr) { @@ -125,6 +144,7 @@ public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean foldExpr, this.allowSubQueryExpr = allowSubQueryExpr; this.outerRR = null; this.subqueryToRelNode = null; + this.rexBuilder = rexBuilder; } /** @@ -271,4 +291,8 @@ public boolean isFoldExpr() { public boolean isCBOExecuted() { return foldExpr; } + + public RexBuilder getRexBuilder() { + return rexBuilder; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java index 4615fc5729..1becfd5075 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java @@ -28,7 +28,9 @@ import java.util.Stack; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; @@ -54,9 +56,9 @@ import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.SubqueryType; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -610,8 +612,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean isTableAlias = input.hasTableAlias(tableOrCol); ColumnInfo colInfo = null; + RowResolver usedRR = null; + int offset = 0; try { colInfo = input.get(null, tableOrCol); + usedRR = input; } catch (SemanticException semanticException) { if (!isTableAlias || parent == null || parent.getType() != HiveParser.DOT) { throw semanticException; @@ -622,6 +627,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver outerRR = ctx.getOuterRR(); isTableAlias = outerRR.hasTableAlias(tableOrCol); colInfo = outerRR.get(null, tableOrCol); + usedRR = outerRR; + offset = input.getColumnInfos().size(); } if (isTableAlias) { @@ -631,7 +638,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } // It's a column. - return exprFactory.toExpr(colInfo); + return exprFactory.toExpr(colInfo, usedRR, offset); } else { // It's a table alias. // We will process that later in DOT. @@ -665,7 +672,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // It's a column. - return exprFactory.toExpr(colInfo); + return exprFactory.toExpr(colInfo, usedRR, offset); } } } @@ -714,7 +721,7 @@ protected boolean isRedundantConversionFunction(ASTNode expr, * @throws UDFArgumentException */ public T getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, - T... children) throws UDFArgumentException { + T... children) throws SemanticException { FunctionInfo fi; try { @@ -742,10 +749,10 @@ public T getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, List childrenList = new ArrayList<>(children.length); childrenList.addAll(Arrays.asList(children)); - return exprFactory.createFuncCallExpr(genericUDF, null, childrenList); + return exprFactory.createFuncCallExpr(genericUDF, udfName, childrenList); } - public T getFuncExprNodeDesc(String udfName, T... children) throws UDFArgumentException { + public T getFuncExprNodeDesc(String udfName, T... children) throws SemanticException { return getFuncExprNodeDescWithUdfData(udfName, null, children); } @@ -768,6 +775,29 @@ public T createConversionCast(T column, PrimitiveTypeInfo tableFieldTypeInfo) protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, List children, GenericUDF genericUDF) throws SemanticException { + // Check if a bigint is implicitely cast to a double as part of a comparison + // Perform the check here instead of in GenericUDFBaseCompare to guarantee it is only run once per operator + if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2) { + TypeInfo oiTypeInfo0 = exprFactory.getTypeInfo(children.get(0)); + TypeInfo oiTypeInfo1 = exprFactory.getTypeInfo(children.get(1)); + + SessionState ss = SessionState.get(); + Configuration conf = (ss != null) ? ss.getConf() : new Configuration(); + + LogHelper console = new LogHelper(LOG); + + // For now, if a bigint is going to be cast to a double throw an error or warning + if ((oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || + (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo))) { + String error = StrictChecks.checkTypeSafety(conf); + if (error != null) throw new UDFArgumentException(error); + console.printError("WARNING: Comparing a bigint and a string may result in a loss of precision."); + } else if ((oiTypeInfo0.equals(TypeInfoFactory.doubleTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || + (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.doubleTypeInfo))) { + console.printError("WARNING: Comparing a bigint and a double may result in a loss of precision."); + } + } + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't // supported if (fi.getGenericUDTF() != null) { @@ -801,17 +831,17 @@ protected void insertCast(String funcText, List children) throws SemanticExce } } - protected T getXpathOrFuncExprNodeDesc(ASTNode expr, + protected T getXpathOrFuncExprNodeDesc(ASTNode node, boolean isFunction, List children, TypeCheckCtx ctx) throws SemanticException, UDFArgumentException { // return the child directly if the conversion is redundant. - if (isRedundantConversionFunction(expr, isFunction, children)) { + if (isRedundantConversionFunction(node, isFunction, children)) { assert (children.size() == 1); assert (children.get(0) != null); return children.get(0); } - String funcText = getFunctionText(expr, isFunction); - T desc; + String funcText = getFunctionText(node, isFunction); + T expr; if (funcText.equals(".")) { // "." : FIELD Expression @@ -821,7 +851,7 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, T object = children.get(0); // Calculate result TypeInfo - String fieldNameString = exprFactory.getConstantValue(children.get(1)).toString(); + String fieldNameString = exprFactory.getConstantValueAsString(children.get(1)); TypeInfo objectTypeInfo = exprFactory.getTypeInfo(object); // Allow accessing a field of list element structs directly from a list @@ -831,19 +861,19 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, } if (objectTypeInfo.getCategory() != Category.STRUCT) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_DOT.getMsg(), expr)); + ErrorMsg.INVALID_DOT.getMsg(), node)); } TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString); if (isList) { t = TypeInfoFactory.getListTypeInfo(t); } - desc = exprFactory.createNestedColumnRefExpr(t, children.get(0), fieldNameString, isList); + expr = exprFactory.createNestedColumnRefExpr(t, children.get(0), fieldNameString, isList); } else if (funcText.equals("[")) { // "[]" : LSQUARE/INDEX Expression if (!ctx.getallowIndexExpr()) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_FUNCTION.getMsg(), expr)); + ErrorMsg.INVALID_FUNCTION.getMsg(), node)); } assert (children.size() == 2); @@ -856,24 +886,24 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)), TypeInfoFactory.intTypeInfo)) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); + node, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); } // Calculate TypeInfo TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); - desc = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); + expr = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); } else if (myt.getCategory() == Category.MAP) { if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)), ((MapTypeInfo) myt).getMapKeyTypeInfo())) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(), expr)); + ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(), node)); } // Calculate TypeInfo TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); - desc = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); + expr = exprFactory.createFuncCallExpr(t, FunctionRegistry.getGenericUDFForIndex(), children); } else { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.NON_COLLECTION_TYPE.getMsg(), expr, myt.getTypeName())); + ErrorMsg.NON_COLLECTION_TYPE.getMsg(), node, myt.getTypeName())); } } else { // other operators or functions @@ -882,10 +912,10 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, if (fi == null) { if (isFunction) { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_FUNCTION.getMsg(), (ASTNode) expr.getChild(0))); + ErrorMsg.INVALID_FUNCTION.getMsg(), (ASTNode) node.getChild(0))); } else { throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_FUNCTION.getMsg(), expr)); + ErrorMsg.INVALID_FUNCTION.getMsg(), node)); } } @@ -894,12 +924,12 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, if (!fi.isNative()) { ctx.getUnparseTranslator().addIdentifierTranslation( - (ASTNode) expr.getChild(0)); + (ASTNode) node.getChild(0)); } // Handle type casts that may contain type parameters if (isFunction) { - ASTNode funcNameNode = (ASTNode) expr.getChild(0); + ASTNode funcNameNode = (ASTNode) node.getChild(0); switch (funcNameNode.getType()) { case HiveParser.TOK_CHAR: // Add type params @@ -941,25 +971,24 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, insertCast(funcText, children); - validateUDF(expr, isFunction, ctx, fi, children, genericUDF); + validateUDF(node, isFunction, ctx, fi, children, genericUDF); // Try to infer the type of the constant only if there are two // nodes, one of them is column and the other is numeric const if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2 - && ((children.get(0) instanceof ExprNodeConstantDesc - && children.get(1) instanceof ExprNodeColumnDesc) - || (children.get(0) instanceof ExprNodeColumnDesc - && children.get(1) instanceof ExprNodeConstantDesc))) { + && ((exprFactory.isConstantExpr(children.get(0)) + && exprFactory.isColumnRefExpr(children.get(1))) + || (exprFactory.isColumnRefExpr(children.get(0)) + && exprFactory.isConstantExpr(children.get(1))))) { - int constIdx = children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; + int constIdx = exprFactory.isConstantExpr(children.get(0)) ? 0 : 1; T constChild = children.get(constIdx); T columnChild = children.get(1 - constIdx); - final PrimitiveTypeInfo colTypeInfo = - TypeInfoFactory.getPrimitiveTypeInfo(exprFactory.getTypeInfo(columnChild).getTypeName().toLowerCase()); - T newChild = interpretNodeAs(colTypeInfo, constChild); + final PrimitiveTypeInfo colTypeInfo = (PrimitiveTypeInfo) exprFactory.getTypeInfo(columnChild); + T newChild = interpretNodeAsConstant(colTypeInfo, constChild); if (newChild == null) { // non-interpretable as target type... // TODO: all comparisons with null should result in null @@ -972,56 +1001,44 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, } } if (genericUDF instanceof GenericUDFIn) { - T columnDesc = children.get(0); - List outputOpList = children.subList(1, children.size()); - List inOperands = new ArrayList<>(outputOpList); - outputOpList.clear(); - - boolean hasNullValue = false; - for (T oldChild : inOperands) { - if (oldChild == null) { - hasNullValue = true; + final TypeInfo targetType = exprFactory.getTypeInfo(columnDesc); + List expressions = new ArrayList<>(); + expressions.add(columnDesc); + for (int i = 1; i < children.size(); i++) { + if (children.get(i) == null) { + T nullConst = exprFactory.createConstantExpr(targetType, null); + expressions.add(nullConst); continue; } - T newChild = interpretNodeAsStruct(columnDesc, oldChild); - if (newChild == null) { - hasNullValue = true; - continue; - } - outputOpList.add(newChild); + T newChild = interpretNodeAsConstantStruct(columnDesc, children.get(i)); + expressions.add(newChild); } - if (hasNullValue) { - T nullConst = exprFactory.createConstantExpr(exprFactory.getTypeInfo(columnDesc), null); - if (outputOpList.size() == 0) { - // we have found only null values...remove the IN ; it will be null all the time. - return nullConst; - } - outputOpList.add(nullConst); + HiveConf conf; + try { + conf = ctx.isCBOExecuted() ? null : Hive.get().getConf(); + } catch (HiveException e) { + throw new SemanticException(e); } - - if (!ctx.isCBOExecuted()) { - - HiveConf conf; - try { - conf = Hive.get().getConf(); - } catch (HiveException e) { - throw new SemanticException(e); - } - if (children.size() <= HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES)) { - List orOperands = exprFactory.rewriteINIntoORFuncCallExpr(children); - if (orOperands != null) { - if (orOperands.size() == 1) { - orOperands.add(exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString())); - } - funcText = "or"; - genericUDF = new GenericUDFOPOr(); - children.clear(); - children.addAll(orOperands); + children.clear(); + List newOperands = expressions; + String newExprFuncText = "in"; + GenericUDF newExprGenericUDF = new GenericUDFIn(); + if (conf != null && expressions.size() <= HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVEOPT_TRANSFORM_IN_MAXNODES)) { + List orOperands = exprFactory.rewriteINIntoORFuncCallExpr(expressions); + if (orOperands != null) { + if (orOperands.size() == 1) { + orOperands.add(exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString())); } + newOperands = orOperands; + newExprFuncText = "or"; + newExprGenericUDF = new GenericUDFOPOr(); } } + children.addAll(newOperands); + funcText = newExprFuncText; + genericUDF = newExprGenericUDF; } if (genericUDF instanceof GenericUDFOPOr) { // flatten OR @@ -1036,7 +1053,7 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, childrenList.add(child); } } - desc = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); + expr = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); } else if (genericUDF instanceof GenericUDFOPAnd) { // flatten AND List childrenList = new ArrayList<>(children.size()); @@ -1050,111 +1067,127 @@ protected T getXpathOrFuncExprNodeDesc(ASTNode expr, childrenList.add(child); } } - desc = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); + expr = exprFactory.createFuncCallExpr(genericUDF, funcText, childrenList); } else if (ctx.isFoldExpr() && exprFactory.canConvertCASEIntoCOALESCEFuncCallExpr(genericUDF, children)) { // Rewrite CASE into COALESCE - desc = exprFactory.createFuncCallExpr(new GenericUDFCoalesce(), null, + expr = exprFactory.createFuncCallExpr(new GenericUDFCoalesce(), "coalesce", Lists.newArrayList(children.get(0), exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString()))); if (Boolean.FALSE.equals(exprFactory.getConstantValue(children.get(1)))) { - desc = exprFactory.createFuncCallExpr(new GenericUDFOPNot(), null, Lists.newArrayList(desc)); + expr = exprFactory.createFuncCallExpr(new GenericUDFOPNot(), "not", Lists.newArrayList(expr)); } } else { - desc = exprFactory.createFuncCallExpr(genericUDF, funcText, children); + expr = exprFactory.createFuncCallExpr(genericUDF, funcText, children); } // If the function is deterministic and the children are constants, // we try to fold the expression to remove e.g. cast on constant - if (ctx.isFoldExpr() && exprFactory.isFuncCallExpr(desc) && + if (ctx.isFoldExpr() && exprFactory.isFuncCallExpr(expr) && FunctionRegistry.isConsistentWithinQuery(genericUDF) && exprFactory.isAllConstants(children)) { - T constantExpr = exprFactory.foldExpr(desc); + T constantExpr = exprFactory.foldExpr(expr); if (constantExpr != null) { - desc = constantExpr; + expr = constantExpr; } } } - // UDFOPPositive is a no-op. - // However, we still create it, and then remove it here, to make sure we - // only allow - // "+" for numeric types. - if (exprFactory.isPOSITIVEFuncCallExpr(desc)) { - assert (exprFactory.getExprChildren(desc).size() == 1); - desc = exprFactory.getExprChildren(desc).get(0); + + if (exprFactory.isPOSITIVEFuncCallExpr(expr)) { + // UDFOPPositive is a no-op. + assert (exprFactory.getExprChildren(expr).size() == 1); + expr = exprFactory.getExprChildren(expr).get(0); + } else if (exprFactory.isNEGATIVEFuncCallExpr(expr)) { + // UDFOPNegative should always be folded. + assert (exprFactory.getExprChildren(expr).size() == 1); + T input = exprFactory.getExprChildren(expr).get(0); + if (exprFactory.isConstantExpr(input)) { + T constantExpr = exprFactory.foldExpr(expr); + if (constantExpr != null) { + expr = constantExpr; + } + } } - assert (desc != null); - return desc; + assert (expr != null); + return expr; } /** * Interprets the given value as columnDesc if possible */ - private T interpretNodeAsStruct(T columnDesc, T valueDesc) + private T interpretNodeAsConstantStruct(T columnDesc, T valueDesc) throws SemanticException { if (exprFactory.isColumnRefExpr(columnDesc)) { final PrimitiveTypeInfo typeInfo = TypeInfoFactory.getPrimitiveTypeInfo(exprFactory.getTypeInfo(columnDesc).getTypeName().toLowerCase()); - return interpretNodeAs(typeInfo, valueDesc); - } - if (exprFactory.isSTRUCTFuncCallExpr(columnDesc) && exprFactory.isConstantStruct(valueDesc)) { - List columnChilds = exprFactory.getExprChildren(columnDesc); - ExprNodeConstantDesc valueConstDesc = (ExprNodeConstantDesc) valueDesc; - StructTypeInfo structTypeInfo = (StructTypeInfo) valueConstDesc.getTypeInfo(); - ArrayList structFieldInfos = structTypeInfo.getAllStructFieldTypeInfos(); - ArrayList newStructFieldInfos = new ArrayList<>(); - - if (columnChilds.size() != structFieldInfos.size()) { - throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnChilds + " and " + structFieldInfos)); - } - List oldValues = (List) valueConstDesc.getValue(); - List newValues = new ArrayList<>(); - for (int i = 0; i < columnChilds.size(); i++) { - newStructFieldInfos.add(exprFactory.getTypeInfo(columnChilds.get(i))); - Object newValue = exprFactory.interpretConstantAsPrimitive( - (PrimitiveTypeInfo) exprFactory.getTypeInfo(columnChilds.get(i)), - oldValues.get(i), - (PrimitiveTypeInfo) structFieldInfos.get(i)); - newValues.add(newValue); - } - StructTypeInfo sti = new StructTypeInfo(); - sti.setAllStructFieldTypeInfos(newStructFieldInfos); - sti.setAllStructFieldNames(structTypeInfo.getAllStructFieldNames()); - return exprFactory.createConstantExpr(sti, newValues); + return interpretNodeAsConstant(typeInfo, valueDesc); + } + boolean columnStruct = exprFactory.isSTRUCTFuncCallExpr(columnDesc); + if (columnStruct) { + boolean constantValuesStruct = exprFactory.isConstantStruct(valueDesc); + boolean valuesStruct = exprFactory.isSTRUCTFuncCallExpr(valueDesc); + if (constantValuesStruct || valuesStruct) { + List columnChilds = exprFactory.getExprChildren(columnDesc); + List structFieldInfos = exprFactory.getStructTypeInfoList(valueDesc); + List structFieldNames = exprFactory.getStructNameList(valueDesc); + + if (columnChilds.size() != structFieldInfos.size()) { + throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnChilds + " and " + structFieldInfos)); + } - } - if (exprFactory.isSTRUCTFuncCallExpr(columnDesc) && exprFactory.isSTRUCTFuncCallExpr(valueDesc)) { - List columnChilds = exprFactory.getExprChildren(columnDesc); - List valueChilds = exprFactory.getExprChildren(valueDesc); - if (columnChilds.size() != valueChilds.size()) { - throw new SemanticException(ErrorMsg.INCOMPATIBLE_STRUCT.getMsg(columnChilds + " and " + valueChilds)); - } - List oldValueChilds = new ArrayList<>(valueChilds); - valueChilds.clear(); - for (int i = 0; i < oldValueChilds.size(); i++) { - T newValue = interpretNodeAsStruct(columnChilds.get(i), oldValueChilds.get(i)); - valueChilds.add(newValue); + if (constantValuesStruct) { + List newStructFieldInfos = new ArrayList<>(); + List oldValues = (List) exprFactory.getConstantValue(valueDesc); + List newValues = new ArrayList<>(); + for (int i = 0; i < columnChilds.size(); i++) { + newStructFieldInfos.add(exprFactory.getTypeInfo(columnChilds.get(i))); + Object newValue = exprFactory.interpretConstantAsPrimitive( + (PrimitiveTypeInfo) exprFactory.getTypeInfo(columnChilds.get(i)), + oldValues.get(i), + (PrimitiveTypeInfo) structFieldInfos.get(i)); + newValues.add(newValue); + } + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldTypeInfos(new ArrayList<>(newStructFieldInfos)); + structTypeInfo.setAllStructFieldNames(new ArrayList<>(structFieldNames)); + return exprFactory.createConstantExpr(structTypeInfo, newValues); + } else { // valuesStruct + List newStructFieldInfos = new ArrayList<>(); + List valueChilds = exprFactory.getExprChildren(valueDesc); + List newValueChilds = new ArrayList<>(); + for (int i = 0; i < columnChilds.size(); i++) { + newStructFieldInfos.add(exprFactory.getTypeInfo(columnChilds.get(i))); + T newValue = interpretNodeAsConstantStruct(columnChilds.get(i), valueChilds.get(i)); + newValueChilds.add(newValue); + } + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldTypeInfos(new ArrayList<>(newStructFieldInfos)); + structTypeInfo.setAllStructFieldNames(new ArrayList<>(structFieldNames)); + return exprFactory.createStructExpr(structTypeInfo, newValueChilds); + } } } return valueDesc; } @VisibleForTesting - protected T interpretNodeAs(PrimitiveTypeInfo colTypeInfo, T constChild) { + protected T interpretNodeAsConstant(PrimitiveTypeInfo targetType, T constChild) throws SemanticException { if (exprFactory.isConstantExpr(constChild)) { // Try to narrow type of constant Object constVal = exprFactory.getConstantValue(constChild); if (constVal == null) { // adjust type of null - return exprFactory.createConstantExpr(colTypeInfo, null); + return exprFactory.createConstantExpr(targetType, null); } + PrimitiveTypeInfo sourceType = + (PrimitiveTypeInfo) exprFactory.getTypeInfo(constChild); Object newConst = exprFactory.interpretConstantAsPrimitive( - colTypeInfo, constVal, (PrimitiveTypeInfo) exprFactory.getTypeInfo(constChild)); + targetType, constVal, sourceType); if (newConst == null) { return null; } if (newConst == constVal) { return constChild; } else { - return exprFactory.createConstantExpr(exprFactory.adjustConstantType(colTypeInfo, newConst), newConst); + return exprFactory.createConstantExpr(exprFactory.adjustConstantType(targetType, newConst), newConst); } } return constChild; @@ -1179,7 +1212,7 @@ private boolean isDescendant(Node ans, Node des) { } protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, - Object... nodeOutputs) throws SemanticException { + Object... nodeOutputs) throws SemanticException { RowResolver input = ctx.getInputRR(); String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) .getText()); @@ -1188,18 +1221,23 @@ protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, T desc = (T) nodeOutputs[1]; String colName; if (exprFactory.isConstantExpr(desc)) { - colName = exprFactory.getConstantValue(desc).toString(); + colName = exprFactory.getConstantValueAsString(desc); } else if (exprFactory.isColumnRefExpr(desc)) { - colName = exprFactory.getColumnName(desc); + colName = exprFactory.getColumnName(desc, input); } else { throw new SemanticException("Unexpected ExprNode : " + nodeOutputs[1]); } + ColumnInfo colInfo = input.get(tableAlias, colName); + RowResolver usedRR = input; + int offset = 0; // Try outer Row resolver if (colInfo == null && ctx.getOuterRR() != null) { RowResolver outerRR = ctx.getOuterRR(); colInfo = outerRR.get(tableAlias, colName); + usedRR = outerRR; + offset = input.getColumnInfos().size(); } if (colInfo == null) { @@ -1207,7 +1245,7 @@ protected T processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, ErrorMsg.INVALID_COLUMN.getMsg(), expr.getChild(1)), expr); return null; } - return exprFactory.toExpr(colInfo); + return exprFactory.toExpr(colInfo, usedRR, offset); } @Override @@ -1300,14 +1338,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (Map.Entry colMap : columns.entrySet()) { ColumnInfo colInfo = colMap.getValue(); if (!colInfo.getIsVirtualCol()) { - columnList = exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo)); + exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo, input, 0)); } } } else { // all columns (select *, for example) for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - columnList = exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo)); + exprFactory.addExprToExprsList(columnList, exprFactory.toExpr(colInfo, input, 0)); } } } @@ -1361,7 +1399,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - children.add(exprFactory.toExpr(colInfo)); + children.add(exprFactory.toExpr(colInfo, input, 0)); } } } @@ -1520,14 +1558,18 @@ private T processGByExpr(Node nd, Object procCtx) throws SemanticException { // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); + RowResolver usedRR = input; + int offset = 0; // try outer row resolver RowResolver outerRR = ctx.getOuterRR(); if (colInfo == null && outerRR != null) { colInfo = outerRR.getExpression(expr); + usedRR = outerRR; + offset = input.getColumnInfos().size(); } if (colInfo != null) { - desc = exprFactory.createColumnRefExpr(colInfo); + desc = exprFactory.createColumnRefExpr(colInfo, usedRR, offset); ASTNode source = input.getExpressionSource(expr); if (source != null && ctx.getUnparseTranslator() != null) { ctx.getUnparseTranslator().addCopyTranslation(expr, source); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 26a74c2af3..bd92730ae9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -664,17 +664,21 @@ public static boolean isNullConstant(ExprNodeDesc value) { public static PrimitiveTypeInfo deriveMinArgumentCast( ExprNodeDesc childExpr, TypeInfo targetType) { + return deriveMinArgumentCast(childExpr.getTypeInfo(), targetType); + } + + public static PrimitiveTypeInfo deriveMinArgumentCast( + TypeInfo childTi, TypeInfo targetType) { assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType; PrimitiveTypeInfo pti = (PrimitiveTypeInfo)targetType; // We only do the minimum cast for decimals. Other types are assumed safe; fix if needed. // We also don't do anything for non-primitive children (maybe we should assert). if ((pti.getPrimitiveCategory() != PrimitiveCategory.DECIMAL) - || (!(childExpr.getTypeInfo() instanceof PrimitiveTypeInfo))) { + || (!(childTi instanceof PrimitiveTypeInfo))) { return pti; } - PrimitiveTypeInfo childTi = (PrimitiveTypeInfo)childExpr.getTypeInfo(); // If the child is also decimal, no cast is needed (we hope - can target type be narrower?). - return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi); + return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory((PrimitiveTypeInfo) childTi); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 1a46cacd4c..35ac545c89 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -232,30 +232,6 @@ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, childrenOIs[i] = children.get(i).getWritableObjectInspector(); } - // Check if a bigint is implicitely cast to a double as part of a comparison - // Perform the check here instead of in GenericUDFBaseCompare to guarantee it is only run once per operator - if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2) { - - TypeInfo oiTypeInfo0 = children.get(0).getTypeInfo(); - TypeInfo oiTypeInfo1 = children.get(1).getTypeInfo(); - - SessionState ss = SessionState.get(); - Configuration conf = (ss != null) ? ss.getConf() : new Configuration(); - - LogHelper console = new LogHelper(LOG); - - // For now, if a bigint is going to be cast to a double throw an error or warning - if ((oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || - (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo))) { - String error = StrictChecks.checkTypeSafety(conf); - if (error != null) throw new UDFArgumentException(error); - console.printError("WARNING: Comparing a bigint and a string may result in a loss of precision."); - } else if ((oiTypeInfo0.equals(TypeInfoFactory.doubleTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.longTypeInfo)) || - (oiTypeInfo0.equals(TypeInfoFactory.longTypeInfo) && oiTypeInfo1.equals(TypeInfoFactory.doubleTypeInfo))) { - console.printError("WARNING: Comparing a bigint and a double may result in a loss of precision."); - } - } - ObjectInspector oi = genericUDF.initializeAndFoldConstants(childrenOIs); String[] requiredJars = genericUDF.getRequiredJars(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java index b27dacb4b4..523d1a6db2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestTypeCheckProcFactory.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.Collection; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory.DefaultExprProcessor; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -78,63 +79,63 @@ public void init() { testSubject = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor(); } - public void testOneCase(Object constValue) { + public void testOneCase(Object constValue) throws SemanticException { Mockito.when(nodeDesc.getValue()).thenReturn(constValue); Mockito.when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); - ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAsConstant(typeInfo, nodeDesc); Assert.assertNotNull(result); Assert.assertEquals(expectedValue, result.getValue()); } - public void testNullCase(Object constValue) { + public void testNullCase(Object constValue) throws SemanticException { Mockito.when(nodeDesc.getValue()).thenReturn(constValue); Mockito.when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); - ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAsConstant(typeInfo, nodeDesc); Assert.assertNull(result); } @Test - public void testWithSring() { + public void testWithSring() throws SemanticException { testOneCase(maxValue.toString()); } @Test - public void testWithLSuffix() { + public void testWithLSuffix() throws SemanticException { if (intType) { testOneCase(maxValue.toString() + "L"); } } @Test - public void testWithZeroFraction() { + public void testWithZeroFraction() throws SemanticException { if (intType) { testOneCase(maxValue.toString() + ".0"); } } @Test - public void testWithFSuffix() { + public void testWithFSuffix() throws SemanticException { testOneCase(maxValue.toString() + "f"); } @Test - public void testWithDSuffix() { + public void testWithDSuffix() throws SemanticException { testOneCase(maxValue.toString() + "D"); } @Test - public void testOverflow() { + public void testOverflow() throws SemanticException { if (intType) { testNullCase(maxValue.add(BigDecimal.valueOf(1L)).toString()); } } @Test - public void testWithNonZeroFraction() { + public void testWithNonZeroFraction() throws SemanticException { if (intType) { testNullCase("100.1"); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java b/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java index ec0bc8915d..93be5a62d1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -71,7 +72,7 @@ * @return A list of expressions * @throws UDFArgumentException if the UDF has been formulated incorrectly */ - public abstract List getExpressionList() throws UDFArgumentException; + public abstract List getExpressionList() throws SemanticException; /** * This method drives the test. It takes the data from getBaseTable() and diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java index 8519a1265a..e745f31423 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java @@ -21,9 +21,8 @@ import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; -import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.testutil.BaseScalarUdfTest; import org.apache.hadoop.hive.ql.testutil.DataBuilder; @@ -63,7 +62,7 @@ } @Override - public List getExpressionList() throws UDFArgumentException { + public List getExpressionList() throws SemanticException { ExprNodeDesc expr1 = OperatorTestUtils.getStringColumn("a"); ExprNodeDesc expr2 = OperatorTestUtils.getStringColumn("b"); ExprNodeDesc exprDesc2 = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java index 8c871d5500..ec7233edec 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRound.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck; import org.junit.Assert; @@ -94,7 +95,7 @@ } @Override - public List getExpressionList() throws UDFArgumentException { + public List getExpressionList() throws SemanticException { List exprs = new ArrayList(cols.length); for (int i = 0; i < cols.length; i++) { exprs.add(OperatorTestUtils.getStringColumn(cols[i])); diff --git a/ql/src/test/queries/clientpositive/partition_coltype_literals.q b/ql/src/test/queries/clientpositive/partition_coltype_literals.q index e65371e8c8..9d92278e4a 100644 --- a/ql/src/test/queries/clientpositive/partition_coltype_literals.q +++ b/ql/src/test/queries/clientpositive/partition_coltype_literals.q @@ -1,4 +1,5 @@ --! qt:dataset:src +set hive.strict.checks.type.safety=false; set hive.stats.column.autogather=false; set hive.compute.query.using.stats=false; drop table if exists partcoltypenum; diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index cd7681c50b..183cc4f8be 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -160,7 +160,7 @@ POSTHOOK: Input: default@alter_coltype #### A masked pattern was here #### OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype` -WHERE `ts` = 3.0 AND `dt` = 100 +WHERE `ts` = 3 AND `dt` = 100 STAGE DEPENDENCIES: Stage-0 is a root stage diff --git a/ql/src/test/results/clientpositive/avrotblsjoin.q.out b/ql/src/test/results/clientpositive/avrotblsjoin.q.out index f6579557aa..3e6969abb8 100644 --- a/ql/src/test/results/clientpositive/avrotblsjoin.q.out +++ b/ql/src/test/results/clientpositive/avrotblsjoin.q.out @@ -72,6 +72,7 @@ POSTHOOK: Output: default@table1_1 POSTHOOK: Lineage: table1_1.col1 SCRIPT [] POSTHOOK: Lineage: table1_1.col2 SCRIPT [] WARNING: Comparing a bigint and a string may result in a loss of precision. +WARNING: Comparing a bigint and a string may result in a loss of precision. Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select table1_n1.col1, table1_1.* from table1_n1 join table1_1 on table1_n1.col1=table1_1.col1 where table1_1.col1="1" PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/decimal_precision2.q.out b/ql/src/test/results/clientpositive/decimal_precision2.q.out index 3ec620cb0d..3cba039026 100644 --- a/ql/src/test/results/clientpositive/decimal_precision2.q.out +++ b/ql/src/test/results/clientpositive/decimal_precision2.q.out @@ -175,7 +175,7 @@ STAGE PLANS: Row Limit Per Split: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 69.0212249755859375 (type: decimal(27,20)) + expressions: 69.0212249755859375 (type: decimal(29,20)) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index dda4be8eb1..cebec998a1 100644 --- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -44,10 +44,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lineitem - filterExpr: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) + filterExpr: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 100 Data size: 19000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_shipmode = 'RAIL') and (DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP)))) (type: boolean) + predicate: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 7 Data size: 1330 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double) diff --git a/ql/src/test/results/clientpositive/foldts.q.out b/ql/src/test/results/clientpositive/foldts.q.out index feda88c156..e995f282cf 100644 --- a/ql/src/test/results/clientpositive/foldts.q.out +++ b/ql/src/test/results/clientpositive/foldts.q.out @@ -20,7 +20,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) + expressions: ctimestamp1 (type: timestamp), unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 563568 Basic stats: COMPLETE Column stats: COMPLETE Limit @@ -134,7 +134,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + expressions: from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') (type: string) outputColumnNames: _col0 Statistics: Num rows: 12288 Data size: 2260992 Basic stats: COMPLETE Column stats: COMPLETE Limit diff --git a/ql/src/test/results/clientpositive/infer_const_type.q.out b/ql/src/test/results/clientpositive/infer_const_type.q.out index aacc329480..b636090081 100644 --- a/ql/src/test/results/clientpositive/infer_const_type.q.out +++ b/ql/src/test/results/clientpositive/infer_const_type.q.out @@ -29,6 +29,7 @@ POSTHOOK: Input: default@infertypes 127 32767 12345 -12345 906.0 -307.0 1234 126 32767 12345 -12345 906.0 -307.0 1234 126 32767 12345 -12345 906.0 -307.0 1.57 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE ti = '127' AND si = 32767 AND @@ -85,6 +86,7 @@ STAGE PLANS: Processor Tree: ListSink +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: SELECT * FROM infertypes WHERE ti = '127' AND si = 32767 AND @@ -108,6 +110,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@infertypes #### A masked pattern was here #### 127 32767 12345 -12345 906.0 -307.0 1234 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -138,6 +141,7 @@ STAGE PLANS: Processor Tree: ListSink +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -182,10 +186,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((ti = 127Y) or (i = -100) or (CAST( si AS decimal(5,0)) = 327)) (type: boolean) + filterExpr: ((ti = 127Y) or (si = 327S) or (i = -100)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ti = 127Y) or (i = -100) or (CAST( si AS decimal(5,0)) = 327)) (type: boolean) + predicate: ((ti = 127Y) or (si = 327S) or (i = -100)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) diff --git a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out index 4ff88b71ec..2fb1ba14f2 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out @@ -179,7 +179,7 @@ HiveFilter(condition=[>($0, 1)]) HiveProject(c_int=[$0]) HiveSortLimit(fetch=[1]) HiveProject(c_int=[$2]) - HiveFilter(condition=[>($3, 1.0E0)]) + HiveFilter(condition=[>($3, 1E0)]) HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) PREHOOK: query: select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int @@ -205,7 +205,7 @@ HiveSortLimit(fetch=[0]) HiveProject(_o__c0=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject($f0=[true]) - HiveFilter(condition=[>($3, 1.0E0)]) + HiveFilter(condition=[>($3, 1E0)]) HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) PREHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 diff --git a/ql/src/test/results/clientpositive/llap/check_constraint.q.out b/ql/src/test/results/clientpositive/llap/check_constraint.q.out index 3ef0744c7b..65b6f0d414 100644 --- a/ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ b/ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -116,7 +116,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: enforce_constraint((((((((- _col0) > (- 10)) is not false and (_col1 > 10) is not false) and _col2 is not null is not false) and _col3 BETWEEN _col0 AND _col1 is not false) and ((_col4 = 23.4) or (_col4 = 56) or (_col4 = 4)) is not false) and ((_col5 > round(567.6)) and (_col5 < round(1000.4))) is not false)) (type: boolean) + predicate: enforce_constraint((((((((- _col0) > -10) is not false and (_col1 > 10) is not false) and _col2 is not null is not false) and _col3 BETWEEN _col0 AND _col1 is not false) and ((_col4 = 23.4) or (_col4 = 56) or (_col4 = 4)) is not false) and ((_col5 > round(567.6)) and (_col5 < round(1000.4))) is not false)) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col3 (type: int), UDFToFloat(_col4) (type: float), UDFToLong(_col5) (type: bigint) diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 19238bc173..4e4f087c8e 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2581,7 +2581,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) + HiveFilter(condition=[>($1, 0)]) HiveAggregate(group=[{1}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 07f0fcdc90..7f294196bf 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -1282,10 +1282,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -1446,10 +1446,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -2763,10 +2763,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -2898,10 +2898,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -5108,10 +5108,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) diff --git a/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out b/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out index f77fb353b1..0ddb814734 100644 --- a/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out +++ b/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out @@ -3594,7 +3594,7 @@ WHERE "sr_item_sk" IS NOT NULL AND "sr_returned_date_sk" IS NOT NULL) AS "t2" ON properties: hive.sql.query SELECT "d_week_seq" FROM "DATE_DIM" -WHERE "d_date" IN (DATE '1998-01-02', DATE '1998-10-15', DATE '1998-11-10') AND "d_week_seq" IS NOT NULL +WHERE "d_date" IN ('1998-01-02', '1998-10-15', '1998-11-10') AND "d_week_seq" IS NOT NULL hive.sql.query.fieldNames d_week_seq hive.sql.query.fieldTypes int hive.sql.query.split true diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 7f7df56412..567ee46b9b 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -18,7 +18,7 @@ PREHOOK: query: select * from src1 where key > 10 and value > 'val' order by key PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"e07e602503383cf2b8477d43c5043f35","queryText":"select * from src1 where key > 10 and value > 'val' order by key limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[3,2],"targets":[0,1],"expression":"((src1.value > 'val') and (UDFToDouble(src1.key) > 10.0D))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e07e602503383cf2b8477d43c5043f35","queryText":"select * from src1 where key > 10 and value > 'val' order by key limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0,1],"expression":"((UDFToDouble(src1.key) > 10.0D) and (src1.value > 'val'))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 146 val_146 150 val_150 213 val_213 @@ -503,7 +503,7 @@ PREHOOK: query: select * from src1 where length(key) > 2 and value > 'a' PREHOOK: type: QUERY PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"f4a6b14cf6ce3c1313d70720cea4e8b3","queryText":"select * from src1 where length(key) > 2 and value > 'a'","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[3,2],"targets":[0,1],"expression":"((src1.value > 'a') and (length(src1.key) > 2))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f4a6b14cf6ce3c1313d70720cea4e8b3","queryText":"select * from src1 where length(key) > 2 and value > 'a'","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0,1],"expression":"((length(src1.key) > 2) and (src1.value > 'a'))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"src1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"src1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"}]} 238 val_238 311 val_311 255 val_255 diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 18f70927d8..7bfe64c875 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -339,15 +339,15 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) - minReductionHashAggr: 0.875 + minReductionHashAggr: 0.96 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -410,18 +410,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + filterExpr: ((t < 0Y) and (t > -2Y)) (type: boolean) Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) - minReductionHashAggr: 0.875 + minReductionHashAggr: 0.96 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -511,7 +511,7 @@ STAGE PLANS: TableScan alias: orc_pred Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -541,9 +541,9 @@ STAGE PLANS: Processor Tree: TableScan alias: orc_pred - filterExpr: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + filterExpr: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -627,7 +627,7 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) @@ -695,10 +695,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index da94fef4a6..83da3258e3 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -344,7 +344,7 @@ STAGE PLANS: TableScan alias: tbl_pred Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -378,9 +378,9 @@ STAGE PLANS: Processor Tree: TableScan alias: tbl_pred - filterExpr: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + filterExpr: ((t < 0Y) and (t > -2Y)) (type: boolean) Filter Operator - predicate: ((t < 0Y) and (UDFToInteger(t) > -2)) (type: boolean) + predicate: ((t < 0Y) and (t > -2Y)) (type: boolean) Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -445,7 +445,7 @@ STAGE PLANS: TableScan alias: tbl_pred Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -475,9 +475,9 @@ STAGE PLANS: Processor Tree: TableScan alias: tbl_pred - filterExpr: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + filterExpr: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Filter Operator - predicate: ((t IS NOT DISTINCT FROM -1) and s is not null and (s like 'bob%')) (type: boolean) + predicate: ((t IS NOT DISTINCT FROM -1Y) and s is not null and (s like 'bob%')) (type: boolean) Select Operator expressions: -1Y (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 @@ -561,7 +561,7 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) @@ -629,10 +629,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (UDFToInteger(t) <> -1) and (UDFToInteger(t) <> -2) and (UDFToInteger(t) <> -3) and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -1Y) and (t <> -2Y) and (t <> -3Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out index 4a957f57c0..013b951e6a 100644 --- a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out +++ b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out @@ -306,13 +306,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: rct_part - filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) = 0.0D)) (type: boolean) + filterExpr: ((UDFToDouble(key) = 0.0D) and (ds = '2008-04-08')) (type: boolean) Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ds = '2008-04-08') and (UDFToDouble(key) = 0.0D)) (type: boolean) - Statistics: Num rows: 2 Data size: 700 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) = 0.0D) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 3 Data size: 1051 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1051 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index a00813c8cc..88c63ae091 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -124,7 +124,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - filterExpr: ((key < '11') or ((key < '11') and (value > 'val_11')) or ((key < '11') and (value > 'val_11'))) (type: boolean) + filterExpr: ((key < '11') or ((value > 'val_11') and (key < '11')) or ((value > 'val_11') and (key < '11'))) (type: boolean) properties: insideView TRUE Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -142,7 +142,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) @@ -159,7 +159,7 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) @@ -187,7 +187,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '11') and (value > 'val_11')) (type: boolean) + predicate: ((value > 'val_11') and (key < '11')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index cc72f4546a..7e58042040 100644 --- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -1131,13 +1131,13 @@ STAGE PLANS: keyExpressions: col 0:int native: true Select Operator - expressions: q548284 (type: int), CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END (type: decimal(2,1)) + expressions: q548284 (type: int), CAST( CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END AS decimal(11,1)) (type: decimal(11,1)) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 8] - selectExpressions: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1) + projectedOutputColumnNums: [0, 9] + selectExpressions: CastDecimalToDecimal(col 8:decimal(2,1))(children: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1)) -> 9:decimal(11,1) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1148,10 +1148,10 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 8:decimal(2,1) + valueColumns: 9:decimal(11,1) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: decimal(2,1)) + value expressions: _col1 (type: decimal(11,1)) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -1168,7 +1168,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: q548284:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1)] + scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1), decimal(11,1)] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1181,12 +1181,12 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(2,1) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(2,1)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(11,1)) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out index 6db296df5a..b183c709ae 100644 --- a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out @@ -197,8 +197,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 6:int, val 10), FilterLongColEqualLongScalar(col 6:bigint, val 571)(children: col 6:int)) - predicate: ((_col1 = 5) or (_col5 = 10) or (UDFToLong(_col5) = 571L)) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColumnInList(col 6:int, values [571, 10])) + predicate: ((_col1 = 5) or (_col5) IN (571, 10)) (type: boolean) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col5 (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (UDFToInteger(_col4)) END (type: int) diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out index 03e277a15b..ae7bd5b3ca 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -76,24 +76,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 11590 Data size: 2232584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -114,7 +114,7 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, decimal(11,4)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 02c9d660ba..72cacf868e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -1689,7 +1689,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) + filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1697,14 +1697,14 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 14:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 14:decimal(7,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterLongColEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 17:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 17:decimal(7,2)/DECIMAL_64), FilterStringColLikeStringScalar(col 7:string, pattern ss))) - predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) - Statistics: Num rows: 10922 Data size: 2201730 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 14:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 14:decimal(7,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 17:decimal(7,2)/DECIMAL_64, val -2628)(children: CastLongToDecimal64(col 1:smallint) -> 17:decimal(7,2)/DECIMAL_64), FilterStringColLikeStringScalar(col 7:string, pattern ss))) + predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) + Statistics: Num rows: 4778 Data size: 963360 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++++++++ keys: cboolean1 (type: boolean), cstring1 (type: string), ctimestamp2 (type: timestamp), cfloat (type: float), cbigint (type: bigint), cdouble (type: double), cint (type: int), csmallint (type: smallint), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) null sort order: zzzzzzzzzzzzzzzzzzzzz - Statistics: Num rows: 10922 Data size: 2201730 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 963360 Basic stats: COMPLETE Column stats: COMPLETE top n: 75 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -1718,7 +1718,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 11, 9, 5, 4, 3, 1, 10, 22, 26, 27, 13, 39, 15, 16, 41, 34, 42, 30, 37, 44] selectExpressions: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 22:int, LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 26:bigint, LongColUnaryMinus(col 3:bigint) -> 27:bigint, DoubleColUnaryMinus(col 4:float) -> 13:float, LongColAddLongColumn(col 37:bigint, col 3:bigint)(children: LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 37:bigint) -> 39:bigint, DoubleColDivideDoubleColumn(col 5:double, col 5:double) -> 15:double, DoubleColUnaryMinus(col 5:double) -> 16:double, LongColMultiplyLongColumn(col 37:bigint, col 40:bigint)(children: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 37:int, LongColUnaryMinus(col 3:bigint) -> 40:bigint) -> 41:bigint, DoubleColAddDoubleColumn(col 29:double, col 30:double)(children: DoubleColUnaryMinus(col 5:double) -> 29:double, CastLongToDouble(col 3:bigint) -> 30:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -1.389, col 32:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 32:decimal(3,0)) -> 42:decimal(8,7), DoubleColModuloDoubleColumn(col 29:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 29:double) -> 30:double, LongColUnaryMinus(col 1:smallint) -> 37:smallint, LongColAddLongColumn(col 1:int, col 43:int)(children: col 1:smallint, LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 43:int) -> 44:int - Statistics: Num rows: 10922 Data size: 3012774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 1318066 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) null sort order: zzzzzzzzzzzzzzzzzzzzz @@ -1727,7 +1727,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10922 Data size: 3012774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 1318066 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized, llap @@ -1757,7 +1757,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6, 1, 21, 2, 5, 3, 4, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 10922 Data size: 3012774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778 Data size: 1318066 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 75 Limit Vectorization: @@ -2009,7 +2009,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) + filterExpr: (((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (csmallint < -6432S)) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) Statistics: Num rows: 12288 Data size: 2403694 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -2017,14 +2017,14 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterDecimal64ColLessEqualDecimal64Scalar(col 13:decimal(13,3)/DECIMAL_64, val -1389)(children: CastLongToDecimal64(col 2:int) -> 13:decimal(13,3)/DECIMAL_64), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:int, val -6432)(children: col 1:smallint)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) - predicate: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) - Statistics: Num rows: 3868 Data size: 756762 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimal64ColLessEqualDecimal64Scalar(col 13:decimal(13,3)/DECIMAL_64, val -1389)(children: CastLongToDecimal64(col 2:int) -> 13:decimal(13,3)/DECIMAL_64), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:smallint, val -6432)), FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) + predicate: (((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (csmallint < -6432S)) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) + Statistics: Num rows: 3828 Data size: 749058 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++ keys: csmallint (type: smallint), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), (UDFToDouble(cbigint) / 3569.0D) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175D) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) null sort order: zzzzzzzzzzzzzzz - Statistics: Num rows: 3868 Data size: 756762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 749058 Basic stats: COMPLETE Column stats: COMPLETE top n: 45 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -2038,7 +2038,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 7, 5, 4, 3, 1, 21, 28, 15, 26, 29, 31, 32, 33, 34, 36] selectExpressions: DoubleColDivideDoubleScalar(col 15:double, val 3569.0)(children: CastLongToDouble(col 3:bigint) -> 15:double) -> 21:double, LongScalarSubtractLongColumn(val -257, col 1:int)(children: col 1:smallint) -> 28:int, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4:float) -> 15:float, DoubleColUnaryMinus(col 5:double) -> 26:double, DoubleColMultiplyDoubleScalar(col 5:double, val 10.175) -> 29:double, DoubleColDivideDoubleColumn(col 30:double, col 4:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4:float) -> 30:float, col 4:float) -> 31:double, DoubleColUnaryMinus(col 4:float) -> 32:float, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 33:int, DoubleColUnaryMinus(col 5:double) -> 34:double, DoubleColMultiplyDoubleColumn(col 5:double, col 35:double)(children: DoubleColUnaryMinus(col 5:double) -> 35:double) -> 36:double - Statistics: Num rows: 3868 Data size: 552696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 547232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) null sort order: zzzzzzzzzzzzzzz @@ -2047,7 +2047,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3868 Data size: 552696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 547232 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized, llap @@ -2077,7 +2077,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] - Statistics: Num rows: 3868 Data size: 552696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3828 Data size: 547232 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 45 Limit Vectorization: @@ -2271,7 +2271,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((UDFToInteger(ctinyint) <= cint) and (UDFToInteger(csmallint) >= -257) and (UDFToDouble(cint) >= cdouble)) (type: boolean) + filterExpr: ((csmallint >= -257S) and (UDFToInteger(ctinyint) <= cint) and (UDFToDouble(cint) >= cdouble)) (type: boolean) Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -2279,14 +2279,14 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterLongColGreaterEqualLongScalar(col 1:int, val -257)(children: col 1:smallint), FilterDoubleColGreaterEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double)) - predicate: ((UDFToInteger(ctinyint) <= cint) and (UDFToInteger(csmallint) >= -257) and (UDFToDouble(cint) >= cdouble)) (type: boolean) - Statistics: Num rows: 455 Data size: 9548 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1:smallint, val -257), FilterLongColLessEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 13:double)) + predicate: ((csmallint >= -257S) and (UDFToInteger(ctinyint) <= cint) and (UDFToDouble(cint) >= cdouble)) (type: boolean) + Statistics: Num rows: 693 Data size: 14504 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: + keys: csmallint (type: smallint) null sort order: z - Statistics: Num rows: 455 Data size: 9548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 14504 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -2300,7 +2300,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 0, 13, 16, 17, 20] selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 1:smallint) -> 14:double, CastLongToDouble(col 1:smallint) -> 15:double) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 455 Data size: 9548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 14504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col4), sum(_col3), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count() Group By Vectorization: @@ -2312,10 +2312,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) - minReductionHashAggr: 0.49890107 + minReductionHashAggr: 0.53679657 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) null sort order: z @@ -2325,7 +2325,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2360,12 +2360,12 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++ keys: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col4 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col4)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), ((_col5 - ((_col6 * _col6) / _col7)) / _col7) (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col8 (type: bigint), (_col8 - -89010L) (type: bigint) null sort order: zzzzzzzzzzz - Statistics: Num rows: 227 Data size: 14980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 21180 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -2379,7 +2379,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 15, 10, 34, 4, 16, 31, 12, 30, 8, 36] selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 15:int, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 30:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 10:double) -> 11:double) -> 10:double, IfExprNullCondExpr(col 20:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 30:bigint) -> 11:double) -> 10:double, DecimalScalarDivideDecimalColumn(val -1.389, col 18:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 18:decimal(5,0)) -> 34:decimal(10,9), DoubleColDivideDoubleColumn(col 11:double, col 12:double)(children: CastLongToDouble(col 30:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 30:int) -> 11:double, CastLongToDouble(col 4:bigint) -> 12:double) -> 16:double, LongColUnaryMinus(col 30:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 30:int) -> 31:int, DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 12:double)(children: DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 11:double) -> 12:double) -> 11:double) -> 12:double, LongColUnaryMinus(col 35:int)(children: LongColUnaryMinus(col 30:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 30:int) -> 35:int) -> 30:int, LongColSubtractLongScalar(col 8:bigint, val -89010) -> 36:bigint - Statistics: Num rows: 227 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 55196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) null sort order: zzzzzzzzzzz @@ -2388,7 +2388,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 227 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 55196 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -2406,7 +2406,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 227 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 321 Data size: 55196 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -2576,30 +2576,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 2563.58), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)(children: col 2:int), FilterLongColLessLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterDoubleColLessDoubleScalar(col 4:float, val -5638.14990234375)), FilterDecimal64ColEqualDecimal64Scalar(col 13:decimal(6,2)/DECIMAL_64, val 256358)(children: CastLongToDecimal64(col 0:tinyint) -> 13:decimal(6,2)/DECIMAL_64), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 14:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(21,2), val -5638.15)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(21,2))))) - predicate: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) - Statistics: Num rows: 7494 Data size: 179052 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 2563.58), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)(children: col 2:int), FilterLongColLessLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterDoubleColLessDoubleScalar(col 4:float, val -5638.14990234375)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 3:bigint) -> 13:double), FilterDecimalColLessDecimalScalar(col 14:decimal(21,2), val -5638.15)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(21,2))))) + predicate: ((cdouble > 2563.58D) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15)))) (type: boolean) + Statistics: Num rows: 1362 Data size: 28504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cfloat (type: float), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 4, 16] - selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 16:double - Statistics: Num rows: 7494 Data size: 179052 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 4, 15] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double + Statistics: Num rows: 1362 Data size: 28504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col0), count(_col0), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double + aggregators: VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double @@ -2607,10 +2607,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) - minReductionHashAggr: 0.55004 + minReductionHashAggr: 0.54919237 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3372 Data size: 155032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 28232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z @@ -2620,7 +2620,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3372 Data size: 155032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 28232 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2655,7 +2655,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 3372 Data size: 155032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 28232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2664,7 +2664,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 12, 20, 28, 4, 37, 55, 5, 59, 68, 73, 81, 82, 2, 84] selectExpressions: DoubleColDivideLongColumn(col 8:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 8:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 12:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 13:double) -> 14:double) -> 15:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 27:double)(children: DoubleColDivideLongColumn(col 23:double, col 26:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 21:double) -> 22:double) -> 23:double, IfExprNullCondExpr(col 24:boolean, null, col 25:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 24:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 25:bigint) -> 26:bigint) -> 27:double) -> 28:double, DoubleColAddDoubleScalar(col 36:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 35:double)(children: DoubleColDivideLongColumn(col 31:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 31:double, IfExprNullCondExpr(col 32:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 32:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 35:double) -> 36:double) -> 37:double, DoubleColMultiplyDoubleColumn(col 45:double, col 54:double)(children: DoubleColUnaryMinus(col 44:double)(children: DoubleColDivideLongColumn(col 40:double, col 43:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 38:double) -> 39:double) -> 40:double, IfExprNullCondExpr(col 41:boolean, null, col 42:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 41:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 42:bigint) -> 43:bigint) -> 44:double) -> 45:double, DoubleColAddDoubleScalar(col 53:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 52:double)(children: DoubleColDivideLongColumn(col 48:double, col 51:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 47:double)(children: DoubleColDivideLongColumn(col 46:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 46:double) -> 47:double) -> 48:double, IfExprNullCondExpr(col 49:boolean, null, col 50:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 49:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 50:bigint) -> 51:bigint) -> 52:double) -> 53:double) -> 54:double) -> 55:double, DoubleColDivideLongColumn(col 58:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 57:double)(children: DoubleColDivideLongColumn(col 56:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double, DoubleColSubtractDoubleColumn(col 0:double, col 67:double)(children: DoubleColUnaryMinus(col 66:double)(children: DoubleColDivideLongColumn(col 62:double, col 65:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 61:double)(children: DoubleColDivideLongColumn(col 60:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 60:double) -> 61:double) -> 62:double, IfExprNullCondExpr(col 63:boolean, null, col 64:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 63:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 64:bigint) -> 65:bigint) -> 66:double) -> 67:double) -> 68:double, FuncPowerDoubleToDouble(col 72:double)(children: DoubleColDivideLongColumn(col 71:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 70:double)(children: DoubleColDivideLongColumn(col 69:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 69:double) -> 70:double) -> 71:double) -> 72:double) -> 73:double, DoubleColAddDoubleColumn(col 0:double, col 80:double)(children: DoubleColDivideLongColumn(col 76:double, col 79:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 75:double)(children: DoubleColDivideLongColumn(col 74:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 74:double) -> 75:double) -> 76:double, IfExprNullCondExpr(col 77:boolean, null, col 78:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 77:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 78:bigint) -> 79:bigint) -> 80:double) -> 81:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 82:double, DoubleScalarModuloDoubleColumn(val -863.257, col 83:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 83:double) -> 84:double - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z @@ -2673,7 +2673,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -2691,13 +2691,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3372 Data size: 424792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 614 Data size: 77352 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index d7f8f73b9b..a2d437bd9f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -1180,10 +1180,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -1375,10 +1375,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -2968,10 +2968,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -3134,10 +3134,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) @@ -5644,10 +5644,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date = '2008-04-08') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string), hr (type: string) diff --git a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index bdb876e618..7b2bc2ed64 100644 --- a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -239,10 +239,10 @@ STAGE PLANS: value expressions: _col0 (type: string) TableScan alias: b - filterExpr: ((id = 'Id_2') and (val = 'val_104')) (type: boolean) + filterExpr: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((id = 'Id_2') and (val = 'val_104')) (type: boolean) + predicate: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out index d59f9f338d..8b3541c85b 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -72,24 +72,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 11590 Data size: 2232584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out index 13f4e78880..5023927b2e 100644 --- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -235,6 +235,7 @@ POSTHOOK: Input: default@partcoltypenum POSTHOOK: Input: default@partcoltypenum@tint=110/sint=22000/bint=330000000000 #### A masked pattern was here #### 30 +WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: select count(1) from partcoltypenum where tint=110Y and sint=22000 and bint='330000000000' PREHOOK: type: QUERY PREHOOK: Input: default@partcoltypenum diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index d10a0708a4..09b776f60d 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -1391,7 +1391,7 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 #### A masked pattern was here #### OPTIMIZED SQL: SELECT `ds` FROM `default`.`pcs_t1` -WHERE ROW(`ds`, `key`, RAND(100)) IN (ROW('2000-04-08', 1, 0.2), ROW('2000-04-09', 2, 0.3)) +WHERE `ds` = '2000-04-08' AND `key` = 1 AND RAND(100) = 0.2 OR `ds` = '2000-04-09' AND `key` = 2 AND RAND(100) = 0.3 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1406,18 +1406,18 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds,key,rand(100))) IN (const struct('2000-04-08',1,0.2D), const struct('2000-04-09',2,0.3D)) (type: boolean) - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (((ds = '2000-04-08') and (key = 1) and (rand(100) = 0.2D)) or ((ds = '2000-04-09') and (key = 2) and (rand(100) = 0.3D))) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1560,7 +1560,7 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### OPTIMIZED SQL: SELECT `ds` FROM `default`.`pcs_t1` -WHERE `ds` <> '2000-04-08' AND `key` = 3 OR (`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 +WHERE `ds` <> '2000-04-08' AND `key` <> 2 AND `key` = 3 OR (`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1571,12 +1571,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcs_t1 - filterExpr: (((ds <> '2000-04-08') and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) + filterExpr: (((ds <> '2000-04-08') and (key <> 2) and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) Statistics: Num rows: 60 Data size: 11280 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds <> '2000-04-08') and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) + predicate: (((ds <> '2000-04-08') and (key <> 2) and (key = 3)) or (((ds = '2000-04-08') or key is not null) and (key = 2))) (type: boolean) Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) @@ -1796,7 +1796,7 @@ POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### OPTIMIZED SQL: SELECT `ds` FROM `default`.`pcs_t1` -WHERE `key` = 3 OR ((`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 OR `ds` <> '2000-04-08' AND `key` = 3) AND `key` + 5 > 0 +WHERE `key` = 3 OR ((`ds` = '2000-04-08' OR `key` IS NOT NULL) AND `key` = 2 OR `ds` <> '2000-04-08' AND `key` <> 2 AND `key` = 3) AND `key` + 5 > 0 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1807,12 +1807,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcs_t1 - filterExpr: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key = 3))) and ((key + 5) > 0))) (type: boolean) + filterExpr: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key <> 2) and (key = 3))) and ((key + 5) > 0))) (type: boolean) Statistics: Num rows: 60 Data size: 11280 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key = 3))) and ((key + 5) > 0))) (type: boolean) + predicate: ((key = 3) or (((((ds = '2000-04-08') or key is not null) and (key = 2)) or ((ds <> '2000-04-08') and (key <> 2) and (key = 3))) and ((key + 5) > 0))) (type: boolean) Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/ppd_gby_join.q.out index a9b7593714..51a92a7cc3 100644 --- a/ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -33,10 +33,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -50,10 +50,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -307,10 +307,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -324,10 +324,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join.q.out b/ql/src/test/results/clientpositive/ppd_join.q.out index 1117d34abc..e6f28b846a 100644 --- a/ql/src/test/results/clientpositive/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_join.q.out @@ -30,10 +30,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -47,10 +47,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -560,10 +560,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and ((value < 'val_50') or (key > '2')) and (key <> '4')) (type: boolean) Statistics: Num rows: 36 Data size: 6408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -577,10 +577,10 @@ STAGE PLANS: Statistics: Num rows: 36 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + filterExpr: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '20') and (key < '400') and (key <> '4')) (type: boolean) + predicate: ((key < '400') and (key > '20') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out b/ql/src/test/results/clientpositive/ppd_join2.q.out index c09fefe225..6380392630 100644 --- a/ql/src/test/results/clientpositive/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_join2.q.out @@ -37,10 +37,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + predicate: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -55,10 +55,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + filterExpr: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + predicate: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -103,10 +103,10 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src - filterExpr: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + filterExpr: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + predicate: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) @@ -1728,10 +1728,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + predicate: ((key < '400') and (key <> '305') and (key <> '302') and (key <> '14') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1746,10 +1746,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + filterExpr: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) + predicate: ((key < '400') and (key <> '302') and (key <> '305') and (key <> '311') and (key <> '14') and ((value <> 'val_50') or (key > '1')) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1794,10 +1794,10 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src - filterExpr: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + filterExpr: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) + predicate: ((sqrt(key) <> 13.0D) and (key <> '306') and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index 71bf59fbc1..4b1f881e6b 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -37,10 +37,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -54,10 +54,10 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -97,10 +97,10 @@ STAGE PLANS: Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1783,10 +1783,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '13') and (key <> '11') and (key <> '12') and (key <> '1') and (key <> '4')) (type: boolean) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1800,10 +1800,10 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '11') and (key <> '12') and (key <> '13') and (key <> '4') and (key <> '1') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1843,10 +1843,10 @@ STAGE PLANS: Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key < '400') and (key > '0') and (key <> '12') and (key <> '11') and (key <> '13') and (key <> '4') and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/ppd_join_filter.q.out index 051e676a0b..7acee17aa3 100644 --- a/ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -36,7 +36,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -386,7 +386,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -736,7 +736,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -1084,7 +1084,7 @@ INNER JOIN (SELECT `key`, CAST(MIN(`key`) AS DOUBLE) + 2 AS `k2`, CAST(MIN(`key` FROM `default`.`src` WHERE `key` IS NOT NULL GROUP BY `key` -HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5.0) AS `t4` ON `t0`.`key` = `t4`.`key` +HAVING CAST(MIN(`key`) AS DOUBLE) + 1 < 5) AS `t4` ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 diff --git a/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out index 740a270169..65115f3547 100644 --- a/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/temp_table_alter_partition_coltype.q.out @@ -344,7 +344,7 @@ POSTHOOK: Input: default@alter_coltype_temp@dt=100/ts=6.30 #### A masked pattern was here #### OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` FROM `default`.`alter_coltype_temp` -WHERE `ts` = 3.0 AND `dt` = 100 +WHERE `ts` = 3 AND `dt` = 100 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out index 2feb6dd738..784f4dc5e6 100644 --- a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out +++ b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out @@ -105,10 +105,10 @@ STAGE PLANS: Processor Tree: TableScan alias: src - filterExpr: (to_unix_timestamp(key) > 10L) (type: boolean) + filterExpr: (unix_timestamp(key) > 10L) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (to_unix_timestamp(key) > 10L) (type: boolean) + predicate: (unix_timestamp(key) > 10L) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git a/ql/src/test/results/clientpositive/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/vector_case_when_2.q.out index a7b46fd8aa..f38ed9eb5b 100644 --- a/ql/src/test/results/clientpositive/vector_case_when_2.q.out +++ b/ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -998,13 +998,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:q548284:int, 1:ROW__ID:struct] Select Operator - expressions: q548284 (type: int), CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END (type: decimal(2,1)) + expressions: q548284 (type: int), CAST( CASE WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (8) END AS decimal(11,1)) (type: decimal(11,1)) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 8] - selectExpressions: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1) + projectedOutputColumnNums: [0, 9] + selectExpressions: CastDecimalToDecimal(col 8:decimal(2,1))(children: IfExprCondExprCondExpr(col 2:boolean, col 3:decimal(2,1)col 7:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 4) -> 2:boolean, ConstantVectorExpression(val 0.8) -> 3:decimal(2,1), IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(2,1)col 6:decimal(2,1))(children: LongColEqualLongScalar(col 0:int, val 5) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:decimal(2,1), ConstantVectorExpression(val 8) -> 6:decimal(2,1)) -> 7:decimal(2,1)) -> 8:decimal(2,1)) -> 9:decimal(11,1) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1017,7 +1017,7 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: decimal(2,1)) + value expressions: _col1 (type: decimal(11,1)) Execution mode: vectorized Map Vectorization: enabled: true @@ -1033,14 +1033,14 @@ STAGE PLANS: includeColumns: [0] dataColumns: q548284:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1)] + scratchColumnTypeNames: [bigint, decimal(2,1), bigint, decimal(2,1), decimal(2,1), decimal(2,1), decimal(2,1), decimal(11,1)] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(2,1)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(11,1)) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/vectorization_10.q.out b/ql/src/test/results/clientpositive/vectorization_10.q.out index 9383bf7130..a4b8687325 100644 --- a/ql/src/test/results/clientpositive/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/vectorization_10.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + filterExpr: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -73,24 +73,24 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimal64ColEqualDecimal64Scalar(col 15:decimal(11,4)/DECIMAL_64, val 97632155639)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(11,4)/DECIMAL_64), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) - predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimal64ColLessEqualDecimal64Scalar(col 14:decimal(6,2)/DECIMAL_64, val -563815)(children: CastLongToDecimal64(col 0:tinyint) -> 14:decimal(6,2)/DECIMAL_64)), FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: (((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or (cstring2 <= '10') or ((cdouble > 6981.0D) and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 11590 Data size: 2232584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] - selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 8, 0, 10, 6, 15, 17, 20, 21, 23, 24, 25, 27, 30, 32] + selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 1:smallint) -> 16:double) -> 17:double, DoubleColModuloDoubleScalar(col 19:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 18:double)(children: CastLongToDouble(col 1:smallint) -> 18:double) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 5:double) -> 21:double, DoubleColModuloDoubleColumn(col 22:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 22:double) -> 23:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 24:smallint, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 3:bigint, col 26:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 26:smallint) -> 27:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 29:double)(children: DoubleColAddDoubleColumn(col 5:double, col 28:double)(children: CastLongToDouble(col 1:smallint) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleColUnaryMinus(col 5:double) -> 31:double) -> 32:double + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11590 Data size: 2178896 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -110,7 +110,7 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, decimal(11,4)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(6,2)/DECIMAL_64, double, double, double, double, double, double, double, double, double, bigint, double, bigint, bigint, double, double, double, double, double] Stage: Stage-0 Fetch Operator