diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondTypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondTypeCheckProcFactory.java new file mode 100644 index 0000000..b768745 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondTypeCheckProcFactory.java @@ -0,0 +1,292 @@ +package org.apache.hadoop.hive.ql.parse; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; + +/** + * JoinCondTypeCheckProcFactory is used by Optiq planner(CBO) to generate Join Conditions from Join Condition AST. + * Reasons for sub class: + * 1. Additional restrictions on what is supported in Join Conditions + * 2. Column handling is different + * 3. Join Condn expr has two input RR as opposed to one. + */ + +/** + * TODO:
+ * 1. Could we use combined RR instead of list of RR ?
+ * 2. Use Column Processing from TypeCheckProcFactory
+ * 3. Why not use GB expr ? + */ +public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory { + + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + return TypeCheckProcFactory.genExprNode(expr, tcCtx, new JoinCondTypeCheckProcFactory()); + } + + /** + * Processor for table columns. + */ + public static class JoinCondColumnExprProcessor extends ColumnExprProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ASTNode expr = (ASTNode) nd; + ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null; + + if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); + return null; + } + + assert (expr.getChildCount() == 1); + String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + + boolean qualifiedAccess = (parent != null && parent.getType() == HiveParser.DOT); + + ColumnInfo colInfo = null; + if (!qualifiedAccess) { + colInfo = getColInfo(ctx, null, tableOrCol, expr); + // It's a column. + return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + } else if (hasTableAlias(ctx, tableOrCol, expr)) { + return null; + } else { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(expr)); + } + } + + private static boolean hasTableAlias(JoinTypeCheckCtx ctx, String tabName, ASTNode expr) + throws SemanticException { + int tblAliasCnt = 0; + for (RowResolver rr : ctx.getInputRRList()) { + if (rr.hasTableAlias(tabName)) + tblAliasCnt++; + } + + if (tblAliasCnt > 1) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + + return (tblAliasCnt == 1) ? true : false; + } + + private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, + ASTNode expr) throws SemanticException { + ColumnInfo tmp; + ColumnInfo cInfoToRet = null; + + for (RowResolver rr : ctx.getInputRRList()) { + tmp = rr.get(tabName, colAlias); + if (tmp != null) { + if (cInfoToRet != null) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + cInfoToRet = tmp; + } + } + + return cInfoToRet; + } + } + + /** + * Factory method to get ColumnExprProcessor. + * + * @return ColumnExprProcessor. + */ + @Override + public ColumnExprProcessor getColumnExprProcessor() { + return new JoinCondColumnExprProcessor(); + } + + /** + * The default processor for typechecking. + */ + public static class JoinCondDefaultExprProcessor extends DefaultExprProcessor { + @Override + protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { + JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx; + List possibleColumnNames = new ArrayList(); + for (RowResolver rr : jCtx.getInputRRList()) { + possibleColumnNames.addAll(rr.getReferenceableColumnAliases(null, -1)); + } + + return possibleColumnNames; + } + + @Override + protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + Object... nodeOutputs) throws SemanticException { + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) + .getText()); + // NOTE: tableAlias must be a valid non-ambiguous table alias, + // because we've checked that in TOK_TABLE_OR_COL's process method. + ColumnInfo colInfo = getColInfo((JoinTypeCheckCtx) ctx, tableAlias, + ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); + + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); + return null; + } + return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), tableAlias, + colInfo.getIsVirtualCol()); + } + + private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, String colAlias, + ASTNode expr) throws SemanticException { + ColumnInfo tmp; + ColumnInfo cInfoToRet = null; + + for (RowResolver rr : ctx.getInputRRList()) { + tmp = rr.get(tabName, colAlias); + if (tmp != null) { + if (cInfoToRet != null) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + cInfoToRet = tmp; + } + } + + return cInfoToRet; + } + + @Override + protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, + List children, GenericUDF genericUDF) throws SemanticException { + super.validateUDF(expr, isFunction, ctx, fi, children, genericUDF); + + JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx; + + // Join Condition can not contain disjunctions + if (genericUDF instanceof GenericUDFOPOr) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(expr)); + } + + // Non Conjunctive elements have further limitations in Join conditions + if (!(genericUDF instanceof GenericUDFOPAnd)) { + // Non Comparison UDF other than 'and' can not use inputs from both side + if (!(genericUDF instanceof GenericUDFBaseCompare)) { + if (genericUDFargsRefersToBothInput(genericUDF, children, jCtx.getInputRRList())) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + } else if (genericUDF instanceof GenericUDFBaseCompare) { + // Comparisons of non literals LHS/RHS can not refer to inputs from + // both sides + if (children.size() == 2 && !(children.get(0) instanceof ExprNodeConstantDesc) + && !(children.get(1) instanceof ExprNodeConstantDesc)) { + if (comparisonUDFargsRefersToBothInput((GenericUDFBaseCompare) genericUDF, children, + jCtx.getInputRRList())) { + throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); + } + } + } + } + } + + private static boolean genericUDFargsRefersToBothInput(GenericUDF udf, + List children, List inputRRList) { + boolean argsRefersToBothInput = false; + + Map hasCodeToColDescMap = new HashMap(); + for (ExprNodeDesc child : children) { + ExprNodeDescUtils.getExprNodeColumnDesc(child, hasCodeToColDescMap); + } + Set inputRef = getInputRef(hasCodeToColDescMap.values(), inputRRList); + + if (inputRef.size() > 1) + argsRefersToBothInput = true; + + return argsRefersToBothInput; + } + + private static boolean comparisonUDFargsRefersToBothInput(GenericUDFBaseCompare comparisonUDF, + List children, List inputRRList) { + boolean argsRefersToBothInput = false; + + Map lhsHashCodeToColDescMap = new HashMap(); + Map rhsHashCodeToColDescMap = new HashMap(); + ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap); + ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap); + Set lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList); + Set rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList); + + if (lhsInputRef.size() > 1 || rhsInputRef.size() > 1) + argsRefersToBothInput = true; + + return argsRefersToBothInput; + } + + private static Set getInputRef(Collection colDescSet, + List inputRRList) { + String tableAlias; + RowResolver inputRR; + Set inputLineage = new HashSet(); + + for (ExprNodeDesc col : colDescSet) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) col; + tableAlias = colDesc.getTabAlias(); + + for (int i = 0; i < inputRRList.size(); i++) { + inputRR = inputRRList.get(i); + + // If table Alias is present check if InputRR has that table and then + // check for internal name + // else if table alias is null then check with internal name in all + // inputRR. + if (tableAlias != null) { + if (inputRR.hasTableAlias(tableAlias)) { + if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { + inputLineage.add(i); + } + } + } else { + if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { + inputLineage.add(i); + } + } + } + } + + return inputLineage; + } + } + + /** + * Factory method to get DefaultExprProcessor. + * + * @return DefaultExprProcessor. + */ + @Override + public DefaultExprProcessor getDefaultExprProcessor() { + return new JoinCondDefaultExprProcessor(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondnTypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondnTypeCheckProcFactory.java deleted file mode 100644 index a14d392..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCondnTypeCheckProcFactory.java +++ /dev/null @@ -1,1111 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.parse; - -import java.sql.Date; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Stack; - -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; -import org.apache.hadoop.hive.ql.udf.SettableUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; - -/** - * The Factory for creating typecheck processors. The typecheck processors are - * used to processes the syntax trees for Join Condition expressions and convert them into - * expression Node Descriptor trees. They also introduce the correct conversion - * functions to do proper implicit conversion. - */ - -/** - * TODO:
- * 1. Migrate common code in to TypeCheckProcFactory
- * 2. Verify if disallowed expressions in join condition would even come here or - * would result in exception in phase1 analysis
- * 3. IS all of the expressions for "getStrExprProcessor" allowed as part of - * join condition
- */ -public final class JoinCondnTypeCheckProcFactory { - - protected static final Log LOG = LogFactory - .getLog(JoinCondnTypeCheckProcFactory.class - .getName()); - - private JoinCondnTypeCheckProcFactory() { - // prevent instantiation - } - - public static Map genExprNode(ASTNode expr, - JoinTypeCheckCtx tcCtx) throws SemanticException { - // Create the walker, the rules dispatcher and the context. - // create a walker which walks the tree in a DFS manner while maintaining - // the operator stack. The dispatcher - // generates the plan from the operator tree - Map opRules = new LinkedHashMap(); - - opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), - getNullExprProcessor()); - opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" - + HiveParser.TinyintLiteral + "%|" + HiveParser.SmallintLiteral + "%|" - + HiveParser.BigintLiteral + "%|" + HiveParser.DecimalLiteral + "%"), - getNumExprProcessor()); - opRules.put(new RuleRegExp("R3", HiveParser.Identifier + "%|" - + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL - + "%|" + HiveParser.TOK_STRINGLITERALSEQUENCE + "%|" + "%|" - + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|" - + HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|" - + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|" - + HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|" - + HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"), getStrExprProcessor()); - opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" - + HiveParser.KW_FALSE + "%"), getBoolExprProcessor()); - opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), - getDateExprProcessor()); - opRules.put(new RuleRegExp("R6", HiveParser.TOK_TABLE_OR_COL + "%"), - getColumnExprProcessor()); - opRules.put(new RuleRegExp("R7", HiveParser.TOK_SUBQUERY_OP + "%"), - getSubQueryExprProcessor()); - - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), - opRules, tcCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); - - // Create a list of topop nodes - ArrayList topNodes = new ArrayList(); - topNodes.add(expr); - HashMap nodeOutputs = new LinkedHashMap(); - ogw.startWalking(topNodes, nodeOutputs); - - return convert(nodeOutputs); - } - - // temporary type-safe casting - private static Map convert(Map outputs) { - Map converted = new LinkedHashMap(); - for (Map.Entry entry : outputs.entrySet()) { - if (entry.getKey() instanceof ASTNode - && (entry.getValue() == null || entry.getValue() instanceof ExprNodeDesc)) { - converted - .put((ASTNode) entry.getKey(), (ExprNodeDesc) entry.getValue()); - } else { - LOG.warn("Invalid type entry " + entry); - } - } - return converted; - } - - /** - * Processor for processing NULL expression. - */ - public static class NullExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - return new ExprNodeNullDesc(); - } - - } - - /** - * Factory method to get NullExprProcessor. - * - * @return NullExprProcessor. - */ - public static NullExprProcessor getNullExprProcessor() { - return new NullExprProcessor(); - } - - /** - * Processor for processing numeric constants. - */ - public static class NumExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - Number v = null; - ASTNode expr = (ASTNode) nd; - // The expression can be any one of Double, Long and Integer. We - // try to parse the expression in that order to ensure that the - // most specific type is used for conversion. - try { - if (expr.getText().endsWith("L")) { - // Literal bigint. - v = Long.valueOf(expr.getText().substring(0, - expr.getText().length() - 1)); - } else if (expr.getText().endsWith("S")) { - // Literal smallint. - v = Short.valueOf(expr.getText().substring(0, - expr.getText().length() - 1)); - } else if (expr.getText().endsWith("Y")) { - // Literal tinyint. - v = Byte.valueOf(expr.getText().substring(0, - expr.getText().length() - 1)); - } else if (expr.getText().endsWith("BD")) { - // Literal decimal - String strVal = expr.getText().substring(0, - expr.getText().length() - 2); - HiveDecimal hd = HiveDecimal.create(strVal); - int prec = 1; - int scale = 0; - if (hd != null) { - prec = hd.precision(); - scale = hd.scale(); - } - DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, - scale); - return new ExprNodeConstantDesc(typeInfo, strVal); - } else { - v = Double.valueOf(expr.getText()); - v = Long.valueOf(expr.getText()); - v = Integer.valueOf(expr.getText()); - } - } catch (NumberFormatException e) { - // do nothing here, we will throw an exception in the following block - } - if (v == null) { - throw new SemanticException( - ErrorMsg.INVALID_NUMERICAL_CONSTANT.getMsg(expr)); - } - return new ExprNodeConstantDesc(v); - } - - } - - /** - * Factory method to get NumExprProcessor. - * - * @return NumExprProcessor. - */ - public static NumExprProcessor getNumExprProcessor() { - return new NumExprProcessor(); - } - - /** - * Processor for processing string constants. - */ - public static class StrExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - String str = null; - - switch (expr.getToken().getType()) { - case HiveParser.StringLiteral: - str = BaseSemanticAnalyzer.unescapeSQLString(expr.getText()); - break; - case HiveParser.TOK_STRINGLITERALSEQUENCE: - StringBuilder sb = new StringBuilder(); - for (Node n : expr.getChildren()) { - sb.append(BaseSemanticAnalyzer.unescapeSQLString(((ASTNode) n) - .getText())); - } - str = sb.toString(); - break; - case HiveParser.TOK_CHARSETLITERAL: - str = BaseSemanticAnalyzer.charSetString(expr.getChild(0).getText(), - expr.getChild(1).getText()); - break; - default: - // HiveParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT | - // HiveParse.KW_RIGHT - str = BaseSemanticAnalyzer.unescapeIdentifier(expr.getText()); - break; - } - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); - } - - } - - /** - * Factory method to get StrExprProcessor. - * - * @return StrExprProcessor. - */ - public static StrExprProcessor getStrExprProcessor() { - return new StrExprProcessor(); - } - - /** - * Processor for boolean constants. - */ - public static class BoolExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - Boolean bool = null; - - switch (expr.getToken().getType()) { - case HiveParser.KW_TRUE: - bool = Boolean.TRUE; - break; - case HiveParser.KW_FALSE: - bool = Boolean.FALSE; - break; - default: - assert false; - } - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool); - } - - } - - /** - * Factory method to get BoolExprProcessor. - * - * @return BoolExprProcessor. - */ - public static BoolExprProcessor getBoolExprProcessor() { - return new BoolExprProcessor(); - } - - /** - * Processor for date constants. - */ - public static class DateExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - - // Get the string value and convert to a Date value. - try { - String dateString = BaseSemanticAnalyzer.stripQuotes(expr.getText()); - Date date = Date.valueOf(dateString); - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, date); - } catch (IllegalArgumentException err) { - throw new SemanticException( - "Unable to convert date literal string to date value.", err); - } - } - } - - /** - * Factory method to get DateExprProcessor. - * - * @return DateExprProcessor. - */ - public static DateExprProcessor getDateExprProcessor() { - return new DateExprProcessor(); - } - - /** - * Processor for table columns. - */ - public static class ColumnExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) - : null; - - if (expr.getType() != HiveParser.TOK_TABLE_OR_COL) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); - return null; - } - - assert (expr.getChildCount() == 1); - String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr - .getChild(0).getText()); - - boolean qualifiedAccess = (parent != null && parent.getType() == HiveParser.DOT); - - ColumnInfo colInfo = null; - if (!qualifiedAccess) { - colInfo = getColInfo(ctx, null, tableOrCol, expr); - // It's a column. - return new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), - colInfo.getIsVirtualCol()); - } else if (hasTableAlias(ctx, tableOrCol, expr)) { - return null; - } else { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); - return null; - } - } - } - - /** - * Factory method to get ColumnExprProcessor. - * - * @return ColumnExprProcessor. - */ - public static ColumnExprProcessor getColumnExprProcessor() { - return new ColumnExprProcessor(); - } - - /** - * The default processor for typechecking. - */ - public static class DefaultExprProcessor implements NodeProcessor { - - static HashMap specialUnaryOperatorTextHashMap; - static HashMap specialFunctionTextHashMap; - static HashMap conversionFunctionTextHashMap; - static HashSet windowingTokens; - static { - specialUnaryOperatorTextHashMap = new HashMap(); - specialUnaryOperatorTextHashMap.put(HiveParser.PLUS, "positive"); - specialUnaryOperatorTextHashMap.put(HiveParser.MINUS, "negative"); - specialFunctionTextHashMap = new HashMap(); - specialFunctionTextHashMap.put(HiveParser.TOK_ISNULL, "isnull"); - specialFunctionTextHashMap.put(HiveParser.TOK_ISNOTNULL, "isnotnull"); - conversionFunctionTextHashMap = new HashMap(); - conversionFunctionTextHashMap.put(HiveParser.TOK_BOOLEAN, - serdeConstants.BOOLEAN_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_TINYINT, - serdeConstants.TINYINT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_SMALLINT, - serdeConstants.SMALLINT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_INT, - serdeConstants.INT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_BIGINT, - serdeConstants.BIGINT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_FLOAT, - serdeConstants.FLOAT_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_DOUBLE, - serdeConstants.DOUBLE_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, - serdeConstants.STRING_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_CHAR, - serdeConstants.CHAR_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR, - serdeConstants.VARCHAR_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY, - serdeConstants.BINARY_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_DATE, - serdeConstants.DATE_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_TIMESTAMP, - serdeConstants.TIMESTAMP_TYPE_NAME); - conversionFunctionTextHashMap.put(HiveParser.TOK_DECIMAL, - serdeConstants.DECIMAL_TYPE_NAME); - - windowingTokens = new HashSet(); - windowingTokens.add(HiveParser.KW_OVER); - windowingTokens.add(HiveParser.TOK_PARTITIONINGSPEC); - windowingTokens.add(HiveParser.TOK_DISTRIBUTEBY); - windowingTokens.add(HiveParser.TOK_SORTBY); - windowingTokens.add(HiveParser.TOK_CLUSTERBY); - windowingTokens.add(HiveParser.TOK_WINDOWSPEC); - windowingTokens.add(HiveParser.TOK_WINDOWRANGE); - windowingTokens.add(HiveParser.TOK_WINDOWVALUES); - windowingTokens.add(HiveParser.KW_UNBOUNDED); - windowingTokens.add(HiveParser.KW_PRECEDING); - windowingTokens.add(HiveParser.KW_FOLLOWING); - windowingTokens.add(HiveParser.KW_CURRENT); - windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEASC); - windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC); - } - - private static boolean isRedundantConversionFunction(ASTNode expr, - boolean isFunction, ArrayList children) { - if (!isFunction) { - return false; - } - // conversion functions take a single parameter - if (children.size() != 1) { - return false; - } - String funcText = conversionFunctionTextHashMap.get(((ASTNode) expr - .getChild(0)).getType()); - // not a conversion function - if (funcText == null) { - return false; - } - // return true when the child type and the conversion target type is the - // same - return ((PrimitiveTypeInfo) children.get(0).getTypeInfo()).getTypeName() - .equalsIgnoreCase(funcText); - } - - public static String getFunctionText(ASTNode expr, boolean isFunction) { - String funcText = null; - if (!isFunction) { - // For operator, the function name is the operator text, unless it's in - // our special dictionary - if (expr.getChildCount() == 1) { - funcText = specialUnaryOperatorTextHashMap.get(expr.getType()); - } - if (funcText == null) { - funcText = expr.getText(); - } - } else { - // For TOK_FUNCTION, the function name is stored in the first child, - // unless it's in our - // special dictionary. - assert (expr.getChildCount() >= 1); - int funcType = ((ASTNode) expr.getChild(0)).getType(); - funcText = specialFunctionTextHashMap.get(funcType); - if (funcText == null) { - funcText = conversionFunctionTextHashMap.get(funcType); - } - if (funcText == null) { - funcText = ((ASTNode) expr.getChild(0)).getText(); - } - } - return BaseSemanticAnalyzer.unescapeIdentifier(funcText); - } - - /** - * This function create an ExprNodeDesc for a UDF function given the - * children (arguments). It will insert implicit type conversion functions - * if necessary. - * - * @throws UDFArgumentException - */ - static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, - TypeInfo typeInfo, ExprNodeDesc... children) - throws UDFArgumentException { - - FunctionInfo fi = FunctionRegistry.getFunctionInfo(udfName); - if (fi == null) { - throw new UDFArgumentException(udfName + " not found."); - } - - GenericUDF genericUDF = fi.getGenericUDF(); - if (genericUDF == null) { - throw new UDFArgumentException(udfName - + " is an aggregation function or a table function."); - } - - // Add udfData to UDF if necessary - if (typeInfo != null) { - if (genericUDF instanceof SettableUDF) { - ((SettableUDF) genericUDF).setTypeInfo(typeInfo); - } - } - - List childrenList = new ArrayList( - children.length); - childrenList.addAll(Arrays.asList(children)); - return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList); - } - - public static ExprNodeDesc getFuncExprNodeDesc(String udfName, - ExprNodeDesc... children) throws UDFArgumentException { - return getFuncExprNodeDescWithUdfData(udfName, null, children); - } - - static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, - boolean isFunction, ArrayList children, - JoinTypeCheckCtx ctx) throws SemanticException, UDFArgumentException { - // return the child directly if the conversion is redundant. - if (isRedundantConversionFunction(expr, isFunction, children)) { - assert (children.size() == 1); - assert (children.get(0) != null); - return children.get(0); - } - String funcText = getFunctionText(expr, isFunction); - ExprNodeDesc desc; - if (funcText.equals(".")) { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); - } else if (funcText.equals("[")) { - // "[]" : LSQUARE/INDEX Expression - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); - } else { - // other operators or functions - FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcText); - - if (fi == null) { - if (isFunction) { - throw new SemanticException( - ErrorMsg.INVALID_FUNCTION.getMsg((ASTNode) expr.getChild(0))); - } else { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); - } - } - - // getGenericUDF() actually clones the UDF. Just call it once and reuse. - GenericUDF genericUDF = fi.getGenericUDF(); - - if (genericUDF instanceof GenericUDFOPOr) { - throw new SemanticException( - ErrorMsg.INVALID_FUNCTION.getMsg((ASTNode) expr.getChild(0))); - } - - // Handle type casts that may contain type parameters - if (isFunction) { - ASTNode funcNameNode = (ASTNode) expr.getChild(0); - switch (funcNameNode.getType()) { - case HiveParser.TOK_CHAR: - // Add type params - CharTypeInfo charTypeInfo = ParseUtils - .getCharTypeInfo(funcNameNode); - if (genericUDF != null) { - ((SettableUDF) genericUDF).setTypeInfo(charTypeInfo); - } - break; - case HiveParser.TOK_VARCHAR: - VarcharTypeInfo varcharTypeInfo = ParseUtils - .getVarcharTypeInfo(funcNameNode); - if (genericUDF != null) { - ((SettableUDF) genericUDF).setTypeInfo(varcharTypeInfo); - } - break; - case HiveParser.TOK_DECIMAL: - DecimalTypeInfo decTypeInfo = ParseUtils - .getDecimalTypeTypeInfo(funcNameNode); - if (genericUDF != null) { - ((SettableUDF) genericUDF).setTypeInfo(decTypeInfo); - } - break; - default: - // Do nothing - break; - } - } - - // Join Condition can not contain UDTF - if (fi.getGenericUDTF() != null) { - throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); - } - - // UDAF in filter condition, group-by caluse, param of funtion, etc. - if (fi.isGenericUDAF()) { - if (isFunction) { - throw new SemanticException( - ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr - .getChild(0))); - } else { - throw new SemanticException( - ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); - } - } - - if (genericUDF != null) { - if (FunctionRegistry.isStateful(genericUDF)) { - throw new SemanticException( - ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); - } - } - - if (!(genericUDF instanceof GenericUDFOPAnd)) { - if (!(genericUDF instanceof GenericUDFBaseCompare)) { - if (genericUDFargsRefersToBothInput(genericUDF, children, - ctx.getInputRRList())) { - ctx.setError(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr), expr); - } - } else if (genericUDF instanceof GenericUDFBaseCompare) { - if (children.size() == 2 - && !(children.get(0) instanceof ExprNodeConstantDesc) - && !(children.get(1) instanceof ExprNodeConstantDesc)) { - if (comparisonUDFargsRefersToBothInput( - (GenericUDFBaseCompare) genericUDF, children, - ctx.getInputRRList())) { - ctx.setError(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr), - expr); - return null; - } - - if (argsRefersToNeither((GenericUDFBaseCompare) genericUDF, - children, ctx.getInputRRList())) { - ctx.setError(ErrorMsg.INVALID_JOIN_CONDITION_2.getMsg(expr), - expr); - return null; - } - - if (!(genericUDF instanceof GenericUDFOPEqual)) { - ctx.setError(ErrorMsg.INVALID_FUNCTION.getMsg(expr), expr); - return null; - } - } else if (children.size() == 2 - && ((children.get(0) instanceof ExprNodeConstantDesc && children - .get(1) instanceof ExprNodeColumnDesc) || (children.get(0) instanceof ExprNodeColumnDesc && children - .get(1) instanceof ExprNodeConstantDesc))) { - int constIdx = children.get(0) instanceof ExprNodeConstantDesc ? 0 - : 1; - - Set inferTypes = new HashSet(Arrays.asList( - serdeConstants.TINYINT_TYPE_NAME.toLowerCase(), - serdeConstants.SMALLINT_TYPE_NAME.toLowerCase(), - serdeConstants.INT_TYPE_NAME.toLowerCase(), - serdeConstants.BIGINT_TYPE_NAME.toLowerCase(), - serdeConstants.FLOAT_TYPE_NAME.toLowerCase(), - serdeConstants.DOUBLE_TYPE_NAME.toLowerCase(), - serdeConstants.STRING_TYPE_NAME.toLowerCase())); - - String constType = children.get(constIdx).getTypeString() - .toLowerCase(); - String columnType = children.get(1 - constIdx).getTypeString() - .toLowerCase(); - - if (inferTypes.contains(constType) - && inferTypes.contains(columnType) - && !columnType.equalsIgnoreCase(constType)) { - Object originalValue = ((ExprNodeConstantDesc) children - .get(constIdx)).getValue(); - String constValue = originalValue.toString(); - boolean triedDouble = false; - Number value = null; - try { - if (columnType - .equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)) { - value = new Byte(constValue); - } else if (columnType - .equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)) { - value = new Short(constValue); - } else if (columnType - .equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { - value = new Integer(constValue); - } else if (columnType - .equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { - value = new Long(constValue); - } else if (columnType - .equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { - value = new Float(constValue); - } else if (columnType - .equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { - triedDouble = true; - value = new Double(constValue); - } else if (columnType - .equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { - // Don't scramble the const type information if comparing to - // a - // string column, - // It's not useful to do so; as of now, there is also a hack - // in - // SemanticAnalyzer#genTablePlan that causes every column to - // look like a string - // a string down here, so number type information is always - // lost - // otherwise. - boolean isNumber = (originalValue instanceof Number); - triedDouble = !isNumber; - value = isNumber ? (Number) originalValue : new Double( - constValue); - } - } catch (NumberFormatException nfe) { - // this exception suggests the precise type inference did not - // succeed - // we'll try again to convert it to double - // however, if we already tried this, or the column is NUMBER - // type - // and - // the operator is EQUAL, return false due to the type - // mismatch - if (triedDouble - || (genericUDF instanceof GenericUDFOPEqual && !columnType - .equals(serdeConstants.STRING_TYPE_NAME))) { - return new ExprNodeConstantDesc(false); - } - - try { - value = new Double(constValue); - } catch (NumberFormatException ex) { - return new ExprNodeConstantDesc(false); - } - } - - if (value != null) { - children.set(constIdx, new ExprNodeConstantDesc(value)); - } - } - } - } - } - - desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, - children); - } - // UDFOPPositive is a no-op. - // However, we still create it, and then remove it here, to make sure we - // only allow - // "+" for numeric types. - if (FunctionRegistry.isOpPositive(desc)) { - assert (desc.getChildren().size() == 1); - desc = desc.getChildren().get(0); - } - assert (desc != null); - return desc; - } - - private static boolean genericUDFargsRefersToBothInput(GenericUDF udf, - ArrayList children, List inputRRList) { - boolean argsRefersToBothInput = false; - - Map hasCodeToColDescMap = new HashMap(); - for (ExprNodeDesc child : children) { - ExprNodeDescUtils.getExprNodeColumnDesc(child, hasCodeToColDescMap); - } - Set inputRef = getInputRef(hasCodeToColDescMap.values(), inputRRList); - - if (inputRef.size() > 1) - argsRefersToBothInput = true; - - return argsRefersToBothInput; - } - - private static boolean comparisonUDFargsRefersToBothInput( - GenericUDFBaseCompare comparisonUDF, ArrayList children, - List inputRRList) { - boolean argsRefersToBothInput = false; - - Map lhsHashCodeToColDescMap = new HashMap(); - Map rhsHashCodeToColDescMap = new HashMap(); - ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap); - ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap); - Set lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList); - Set rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList); - - if (lhsInputRef.size() > 1 || rhsInputRef.size() > 1) - argsRefersToBothInput = true; - - return argsRefersToBothInput; - } - - private static boolean argsRefersToNeither( - GenericUDFBaseCompare comparisonUDF, ArrayList children, - List inputRRList) { - boolean argsRefersToNeither = false; - - Map lhsHashCodeToColDescMap = new HashMap(); - Map rhsHashCodeToColDescMap = new HashMap(); - ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap); - ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap); - Set lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList); - Set rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList); - - if (lhsInputRef.size() == 0 && rhsInputRef.size() == 0) - argsRefersToNeither = true; - - return argsRefersToNeither; - } - - private static Set getInputRef(Collection colDescSet, - List inputRRList) { - String tableAlias; - RowResolver inputRR; - Set inputLineage = new HashSet(); - - for (ExprNodeDesc col : colDescSet) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) col; - tableAlias = colDesc.getTabAlias(); - - for (int i = 0; i < inputRRList.size(); i++) { - inputRR = inputRRList.get(i); - - // If table Alias is present check if InputRR has that table and then check for internal name - // else if table alias is null then check with internal name in all inputRR. - if (tableAlias != null) { - if (inputRR.hasTableAlias(tableAlias)) { - if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { - inputLineage.add(i); - } - } - } else { - if (inputRR.getInvRslvMap().containsKey(colDesc.getColumn())) { - inputLineage.add(i); - } - } - } - } - - return inputLineage; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - - /* - * Windowing is not supported in Join Condition - */ - if (windowingTokens.contains(expr.getType())) { - ctx.setError(ErrorMsg.INVALID_FUNCTION.getMsg(expr, - "Windowing is not supported in Join Condition"), expr); - - return null; - } - - if (expr.getType() == HiveParser.TOK_TABNAME) { - return null; - } - - if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr, - "Join Condition does not support * syntax"), expr); - - return null; - } - - // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL, - // and the operator is a DOT, then it's a table column reference. - if (expr.getType() == HiveParser.DOT - && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && nodeOutputs[0] == null) { - - String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr - .getChild(0).getChild(0).getText()); - // NOTE: tableAlias must be a valid non-ambiguous table alias, - // because we've checked that in TOK_TABLE_OR_COL's process method. - ColumnInfo colInfo = getColInfo(ctx, tableAlias, - ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(), expr); - - if (colInfo == null) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); - return null; - } - return new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), tableAlias, colInfo.getIsVirtualCol()); - } - - // Return nulls for conversion operators - if (conversionFunctionTextHashMap.keySet().contains(expr.getType()) - || specialFunctionTextHashMap.keySet().contains(expr.getType()) - || expr.getToken().getType() == HiveParser.CharSetName - || expr.getToken().getType() == HiveParser.CharSetLiteral) { - return null; - } - - boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION - || expr.getType() == HiveParser.TOK_FUNCTIONSTAR || expr.getType() == HiveParser.TOK_FUNCTIONDI); - - // Create all children - int childrenBegin = (isFunction ? 1 : 0); - ArrayList children = new ArrayList( - expr.getChildCount() - childrenBegin); - for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { - if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) { - children.addAll(((ExprNodeColumnListDesc) nodeOutputs[ci]) - .getChildren()); - } else { - children.add((ExprNodeDesc) nodeOutputs[ci]); - } - } - - if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); - return null; - } - - // If any of the children contains null, then return a null - // this is a hack for now to handle the group by case - if (children.contains(null)) { - List possibleColumnNames = getReferenceableColumnAliases(ctx); - String reason = String.format("(possible column names are: %s)", - StringUtils.join(possibleColumnNames, ", ")); - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason), - expr); - return null; - } - - // Create function desc - try { - return getXpathOrFuncExprNodeDesc(expr, isFunction, children, ctx); - } catch (UDFArgumentTypeException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE.getMsg( - expr.getChild(childrenBegin + e.getArgumentId()), e.getMessage())); - } catch (UDFArgumentLengthException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH.getMsg( - expr, e.getMessage())); - } catch (UDFArgumentException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, - e.getMessage())); - } - } - - } - - /** - * Factory method to get DefaultExprProcessor. - * - * @return DefaultExprProcessor. - */ - public static DefaultExprProcessor getDefaultExprProcessor() { - return new DefaultExprProcessor(); - } - - /** - * Processor for subquery expressions.. - */ - public static class SubQueryExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - JoinTypeCheckCtx ctx = (JoinTypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); - /* - * Restriction.1.h :: SubQueries not supported in Join Condition. - */ - ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, - "SubQuery expressions are npt supported in Join Condition"), sqNode); - return null; - } - } - - /** - * Factory method to get SubQueryExprProcessor. - * - * @return DateExprProcessor. - */ - public static SubQueryExprProcessor getSubQueryExprProcessor() { - return new SubQueryExprProcessor(); - } - - private static boolean hasTableAlias(JoinTypeCheckCtx ctx, String tabName, - ASTNode expr) throws SemanticException { - int tblAliasCnt = 0; - for (RowResolver rr : ctx.getInputRRList()) { - if (rr.hasTableAlias(tabName)) - tblAliasCnt++; - } - - if (tblAliasCnt > 1) { - throw new SemanticException( - ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - - return (tblAliasCnt == 1) ? true : false; - } - - private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, - String colAlias, ASTNode expr) throws SemanticException { - ColumnInfo tmp; - ColumnInfo cInfoToRet = null; - - for (RowResolver rr : ctx.getInputRRList()) { - tmp = rr.get(tabName, colAlias); - if (tmp != null) { - if (cInfoToRet != null) { - throw new SemanticException( - ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr)); - } - cInfoToRet = tmp; - } - } - - return cInfoToRet; - } - - private static List getReferenceableColumnAliases(JoinTypeCheckCtx ctx) { - List possibleColumnNames = new ArrayList(); - for (RowResolver rr : ctx.getInputRRList()) { - possibleColumnNames.addAll(rr.getReferenceableColumnAliases(null, -1)); - } - - return possibleColumnNames; - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinTypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinTypeCheckCtx.java index eb4e87c..20699f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinTypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinTypeCheckCtx.java @@ -1,54 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.ql.parse; -import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; - import com.google.common.collect.ImmutableList; -public class JoinTypeCheckCtx implements NodeProcessorCtx { - /** - * Potential typecheck error reason. - */ - private String error; +/** + * JoinTypeCheckCtx is used by Optiq planner(CBO) to generate Join Conditions from Join Condition AST. + * Reasons for sub class: + * 1. Join Conditions can not handle: + * a. Stateful Functions + * b. Distinct + * c. '*' expr + * d. '.*' expr + * e. Windowing expr + * f. Complex type member access + * g. Array Index Access + * h. Sub query + * i. GB expr elimination + * 2. Join Condn expr has two input RR as opposed to one. + */ - /** - * The node that generated the potential typecheck error - */ - private ASTNode errorSrcNode; - - private final ImmutableList m_inputRRLst; +/** + * TODO:
+ * 1. Could we use combined RR instead of list of RR ?
+ * 2. Why not use GB expr ? + */ +public class JoinTypeCheckCtx extends TypeCheckCtx { + private final ImmutableList inputRRLst; + private final boolean outerJoin; - public JoinTypeCheckCtx(RowResolver... inputRRLst) { - m_inputRRLst = new ImmutableList.Builder().addAll(Arrays.asList(inputRRLst)).build(); + public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType hiveJoinType) + throws SemanticException { + super(RowResolver.getCombinedRR(leftRR, rightRR), false, false, false, false, false, false, + false, false, false); + this.inputRRLst = ImmutableList.of(leftRR, rightRR); + this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == JoinType.RIGHTOUTER) + || (hiveJoinType == JoinType.FULLOUTER); } /** * @return the inputRR List */ public List getInputRRList() { - return m_inputRRLst; + return inputRRLst; } - /** - * @param error - * the error to set - * - */ - public void setError(String error, ASTNode errorSrcNode) { - this.error = error; - this.errorSrcNode = errorSrcNode; - } - - /** - * @return the error - */ - public String getError() { - return error; + public boolean isOuterJoin() { + return outerJoin; } - - public ASTNode getErrorSrcNode() { - return errorSrcNode; - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 216c69b..bf284dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12166,8 +12166,12 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ // 2. Construct ExpressionNodeDesc representing Join Condition RexNode optiqJoinCond = null; if (joinCond != null) { - Map exprNodes = JoinCondnTypeCheckProcFactory.genExprNode(joinCond, - new JoinTypeCheckCtx(leftRR, rightRR)); + JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType); + Map exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond, + jCtx); + if (jCtx.getError() != null) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(), + jCtx.getError())); ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java index a95ae20..11bd103 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java @@ -54,6 +54,22 @@ private boolean allowDistinctFunctions; + private final boolean allowGBExprElimination; + + private final boolean allowAllColRef; + + private final boolean allowFunctionStar; + + private final boolean allowWindowing; + + // "." : FIELD Expression + private final boolean allowFieldExpr; + + // "[]" : LSQUARE/INDEX Expression + private final boolean allowIndexExpr; + + private final boolean allowSubQueryExpr; + /** * Constructor. * @@ -61,10 +77,24 @@ * The input row resolver of the previous operator. */ public TypeCheckCtx(RowResolver inputRR) { + this(inputRR, false, true, true, true, true, true, true, true, true); + } + + public TypeCheckCtx(RowResolver inputRR, boolean allowStatefulFunctions, + boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, + boolean allowFunctionStar, boolean allowWindowing, boolean allowFieldExpr, + boolean allowIndexExpr, boolean allowSubQueryExpr) { setInputRR(inputRR); error = null; - allowStatefulFunctions = false; - allowDistinctFunctions = true; + this.allowStatefulFunctions = allowStatefulFunctions; + this.allowDistinctFunctions = allowDistinctFunctions; + this.allowGBExprElimination = allowGBExprElimination; + this.allowAllColRef = allowAllColRef; + this.allowFunctionStar = allowFunctionStar; + this.allowWindowing = allowWindowing; + this.allowFieldExpr = allowFieldExpr; + this.allowIndexExpr = allowIndexExpr; + this.allowSubQueryExpr = allowSubQueryExpr; } /** @@ -98,7 +128,8 @@ public UnparseTranslator getUnparseTranslator() { } /** - * @param allowStatefulFunctions whether to allow stateful UDF invocations + * @param allowStatefulFunctions + * whether to allow stateful UDF invocations */ public void setAllowStatefulFunctions(boolean allowStatefulFunctions) { this.allowStatefulFunctions = allowStatefulFunctions; @@ -114,7 +145,7 @@ public boolean getAllowStatefulFunctions() { /** * @param error * the error to set - * + * */ public void setError(String error, ASTNode errorSrcNode) { this.error = error; @@ -136,7 +167,35 @@ public void setAllowDistinctFunctions(boolean allowDistinctFunctions) { this.allowDistinctFunctions = allowDistinctFunctions; } - public boolean isAllowDistinctFunctions() { + public boolean getAllowDistinctFunctions() { return allowDistinctFunctions; } + + public boolean getAllowGBExprElimination() { + return allowGBExprElimination; + } + + public boolean getallowAllColRef() { + return allowAllColRef; + } + + public boolean getallowFunctionStar() { + return allowFunctionStar; + } + + public boolean getallowWindowing() { + return allowWindowing; + } + + public boolean getallowFieldExpr() { + return allowFieldExpr; + } + + public boolean getallowIndexExpr() { + return allowIndexExpr; + } + + public boolean getallowSubQueryExpr() { + return allowSubQueryExpr; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index e44f5ae..1e60f7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -80,12 +80,12 @@ * expression Node Descriptor trees. They also introduce the correct conversion * functions to do proper implicit conversion. */ -public final class TypeCheckProcFactory { +public class TypeCheckProcFactory { protected static final Log LOG = LogFactory.getLog(TypeCheckProcFactory.class .getName()); - private TypeCheckProcFactory() { + protected TypeCheckProcFactory() { // prevent instantiation } @@ -118,7 +118,7 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) RowResolver input = ctx.getInputRR(); ExprNodeDesc desc = null; - if ((ctx == null) || (input == null)) { + if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) { return null; } @@ -137,8 +137,13 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) return desc; } - public static Map genExprNode(ASTNode expr, - TypeCheckCtx tcCtx) throws SemanticException { + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + return genExprNode(expr, tcCtx, new TypeCheckProcFactory()); + } + + protected static Map genExprNode(ASTNode expr, + TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException { // Create the walker, the rules dispatcher and the context. // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher @@ -146,13 +151,13 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), - getNullExprProcessor()); + tf.getNullExprProcessor()); opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" + HiveParser.TinyintLiteral + "%|" + HiveParser.SmallintLiteral + "%|" + HiveParser.BigintLiteral + "%|" + HiveParser.DecimalLiteral + "%"), - getNumExprProcessor()); + tf.getNumExprProcessor()); opRules .put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|" @@ -162,18 +167,18 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|" + HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|" + HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"), - getStrExprProcessor()); + tf.getStrExprProcessor()); opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" - + HiveParser.KW_FALSE + "%"), getBoolExprProcessor()); - opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), getDateExprProcessor()); + + HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor()); + opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), tf.getDateExprProcessor()); opRules.put(new RuleRegExp("R6", HiveParser.TOK_TABLE_OR_COL + "%"), - getColumnExprProcessor()); + tf.getColumnExprProcessor()); opRules.put(new RuleRegExp("R7", HiveParser.TOK_SUBQUERY_OP + "%"), - getSubQueryExprProcessor()); + tf.getSubQueryExprProcessor()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), + Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -229,7 +234,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return NullExprProcessor. */ - public static NullExprProcessor getNullExprProcessor() { + public NullExprProcessor getNullExprProcessor() { return new NullExprProcessor(); } @@ -304,7 +309,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return NumExprProcessor. */ - public static NumExprProcessor getNumExprProcessor() { + public NumExprProcessor getNumExprProcessor() { return new NumExprProcessor(); } @@ -362,7 +367,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return StrExprProcessor. */ - public static StrExprProcessor getStrExprProcessor() { + public StrExprProcessor getStrExprProcessor() { return new StrExprProcessor(); } @@ -408,7 +413,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return BoolExprProcessor. */ - public static BoolExprProcessor getBoolExprProcessor() { + public BoolExprProcessor getBoolExprProcessor() { return new BoolExprProcessor(); } @@ -449,7 +454,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return DateExprProcessor. */ - public static DateExprProcessor getDateExprProcessor() { + public DateExprProcessor getDateExprProcessor() { return new DateExprProcessor(); } @@ -546,7 +551,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return ColumnExprProcessor. */ - public static ColumnExprProcessor getColumnExprProcessor() { + public ColumnExprProcessor getColumnExprProcessor() { return new ColumnExprProcessor(); } @@ -613,7 +618,7 @@ public static ColumnExprProcessor getColumnExprProcessor() { windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC); } - private static boolean isRedundantConversionFunction(ASTNode expr, + protected static boolean isRedundantConversionFunction(ASTNode expr, boolean isFunction, ArrayList children) { if (!isFunction) { return false; @@ -700,7 +705,30 @@ public static ExprNodeDesc getFuncExprNodeDesc(String udfName, return getFuncExprNodeDescWithUdfData(udfName, null, children); } - static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, + protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, + List children, GenericUDF genericUDF) throws SemanticException { + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't + // supported + if (fi.getGenericUDTF() != null) { + throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); + } + // UDAF in filter condition, group-by caluse, param of funtion, etc. + if (fi.getGenericUDAFResolver() != null) { + if (isFunction) { + throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr + .getChild(0))); + } else { + throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); + } + } + if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { + if (FunctionRegistry.isStateful(genericUDF)) { + throw new SemanticException(ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); + } + } + } + + protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, boolean isFunction, ArrayList children, TypeCheckCtx ctx) throws SemanticException, UDFArgumentException { // return the child directly if the conversion is redundant. @@ -713,6 +741,9 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, ExprNodeDesc desc; if (funcText.equals(".")) { // "." : FIELD Expression + if (!ctx.getallowFieldExpr()) + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + assert (children.size() == 2); // Only allow constant field name for now assert (children.get(1) instanceof ExprNodeConstantDesc); @@ -727,23 +758,22 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, // Allow accessing a field of list element structs directly from a list boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST); if (isList) { - objectTypeInfo = ((ListTypeInfo) objectTypeInfo) - .getListElementTypeInfo(); + objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo(); } if (objectTypeInfo.getCategory() != Category.STRUCT) { throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr)); } - TypeInfo t = ((StructTypeInfo) objectTypeInfo) - .getStructFieldTypeInfo(fieldNameString); + TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString); if (isList) { t = TypeInfoFactory.getListTypeInfo(t); } - desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, - isList); - + desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList); } else if (funcText.equals("[")) { // "[]" : LSQUARE/INDEX Expression + if (!ctx.getallowIndexExpr()) + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + assert (children.size() == 2); // Check whether this is a list or a map @@ -753,35 +783,29 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, // Only allow integer index for now if (!(children.get(1) instanceof ExprNodeConstantDesc) || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(TypeInfoFactory.intTypeInfo))) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg())); + .equals(TypeInfoFactory.intTypeInfo))) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_ARRAYINDEX_CONSTANT.getMsg())); } // Calculate TypeInfo TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); - desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry - .getGenericUDFForIndex(), children); + desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); } else if (myt.getCategory() == Category.MAP) { // Only allow constant map key for now if (!(children.get(1) instanceof ExprNodeConstantDesc)) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg())); + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg())); } - if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(((MapTypeInfo) myt).getMapKeyTypeInfo()))) { - throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE - .getMsg(expr)); + if (!(((ExprNodeConstantDesc) children.get(1)).getTypeInfo().equals(((MapTypeInfo) myt) + .getMapKeyTypeInfo()))) { + throw new SemanticException(ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(expr)); } // Calculate TypeInfo TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); - desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry - .getGenericUDFForIndex(), children); + desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); } else { - throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, - myt.getTypeName())); + throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName())); } } else { // other operators or functions @@ -833,26 +857,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, } } - // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't - // supported - if (fi.getGenericUDTF() != null) { - throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); - } - // UDAF in filter condition, group-by caluse, param of funtion, etc. - if (fi.getGenericUDAFResolver() != null) { - if (isFunction) { - throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION. - getMsg((ASTNode) expr.getChild(0))); - } else { - throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); - } - } - if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { - if (FunctionRegistry.isStateful(genericUDF)) { - throw new SemanticException( - ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); - } - } + validateUDF(expr, isFunction, ctx, fi, children, genericUDF); // Try to infer the type of the constant only if there are two // nodes, one of them is column and the other is numeric const @@ -963,6 +968,24 @@ private boolean isDescendant(Node ans, Node des) { return false; } + protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + Object... nodeOutputs) throws SemanticException { + RowResolver input = ctx.getInputRR(); + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) + .getText()); + // NOTE: tableAlias must be a valid non-ambiguous table alias, + // because we've checked that in TOK_TABLE_OR_COL's process method. + ColumnInfo colInfo = input.get(tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue() + .toString()); + + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); + return null; + } + return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + } + @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @@ -1012,7 +1035,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * The difference is that there is translation for Window related tokens, so we just * return null; */ - if ( windowingTokens.contains(expr.getType())) { + if (windowingTokens.contains(expr.getType())) { + if (!ctx.getallowWindowing()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context"))); + return null; } @@ -1021,6 +1048,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + if (!ctx.getallowAllColRef()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg("All column reference is not supported in the context"))); + RowResolver input = ctx.getInputRR(); ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc(); assert expr.getChildCount() <= 1; @@ -1058,22 +1090,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && nodeOutputs[0] == null) { - - RowResolver input = ctx.getInputRR(); - String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr - .getChild(0).getChild(0).getText()); - // NOTE: tableAlias must be a valid non-ambiguous table alias, - // because we've checked that in TOK_TABLE_OR_COL's process method. - ColumnInfo colInfo = input.get(tableAlias, - ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString()); - - if (colInfo == null) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); - return null; - } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + return processQualifiedColRef(ctx, expr, nodeOutputs); } // Return nulls for conversion operators @@ -1088,7 +1105,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, expr.getType() == HiveParser.TOK_FUNCTIONSTAR || expr.getType() == HiveParser.TOK_FUNCTIONDI); - if (!ctx.isAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) { + if (!ctx.getAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) { throw new SemanticException( SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.DISTINCT_NOT_SUPPORTED.getMsg())); } @@ -1107,6 +1124,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) { + if (!ctx.getallowFunctionStar()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg(".* reference is not supported in the context"))); + RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { @@ -1119,8 +1141,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // If any of the children contains null, then return a null // this is a hack for now to handle the group by case if (children.contains(null)) { - RowResolver input = ctx.getInputRR(); - List possibleColumnNames = input.getReferenceableColumnAliases(null, -1); + List possibleColumnNames = getReferenceableColumnAliases(ctx); String reason = String.format("(possible column names are: %s)", StringUtils.join(possibleColumnNames, ", ")); ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason), @@ -1143,6 +1164,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } + protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { + return ctx.getInputRR().getReferenceableColumnAliases(null, -1); + } } /** @@ -1150,7 +1174,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return DefaultExprProcessor. */ - public static DefaultExprProcessor getDefaultExprProcessor() { + public DefaultExprProcessor getDefaultExprProcessor() { return new DefaultExprProcessor(); } @@ -1168,13 +1192,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } + ASTNode expr = (ASTNode) nd; + ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); + + if (!ctx.getallowSubQueryExpr()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg())); + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { return desc; } - ASTNode expr = (ASTNode) nd; - ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); /* * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. */ @@ -1190,7 +1219,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * * @return DateExprProcessor. */ - public static SubQueryExprProcessor getSubQueryExprProcessor() { + public SubQueryExprProcessor getSubQueryExprProcessor() { return new SubQueryExprProcessor(); } }