Index: ql/src/test/results/clientnegative/expr_distributeby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_distributeby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_distributeby1.q.out (working copy) @@ -0,0 +1,57 @@ +PREHOOK: query: -- expressions are not allowed in distribute by without an alias +explain +select key, length(value) as foo from src distribute by key, foo +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in distribute by without an alias +explain +select key, length(value) as foo from src distribute by key, foo +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL value)) foo)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL foo)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: length(value) + type: int + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: int + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10138]: Expressions are not allowed in a distribute by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_sortby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_sortby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_sortby1.q.out (working copy) @@ -0,0 +1,57 @@ +PREHOOK: query: -- expressions are not allowed in sort by without an alias +explain +select key - 10 as foo, value from src sort by foo, value +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in sort by without an alias +explain +select key - 10 as foo, value from src sort by foo, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) 10) foo) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL foo)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key - 10) + type: double + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10140]: Expressions are not allowed in a sort by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_orderby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_orderby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_orderby1.q.out (working copy) @@ -0,0 +1,57 @@ +PREHOOK: query: -- expressions are not allowed in order by without an alias +explain +select length(value) as foo, src.key from src order by foo, src.key +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in order by without an alias +explain +select length(value) as foo, src.key from src order by foo, src.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL value)) foo) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL foo)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: length(value) + type: int + expr: key + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10139]: Expressions are not allowed in an order by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_clusterby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_clusterby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_clusterby1.q.out (working copy) @@ -0,0 +1,62 @@ +PREHOOK: query: -- expressions are not allowed in cluster by without an alias +explain +select key + key as foo, src.value from src cluster by foo, src.value +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in cluster by without an alias +explain +select key + key as foo, src.value from src cluster by foo, src.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)) foo) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL foo) (. (TOK_TABLE_OR_COL src) value)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key + key) + type: double + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: double + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10137]: Expressions are not allowed in a cluster by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_distributeby_sortby_1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_distributeby_sortby_1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_distributeby_sortby_1.q.out (working copy) @@ -0,0 +1,60 @@ +PREHOOK: query: -- expressions are not allowed in a distribute by or a sort by clause +-- without an alias +explain +select key + key as foo, src.value from src distribute by foo sort by src.value +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in a distribute by or a sort by clause +-- without an alias +explain +select key + key as foo, src.value from src distribute by foo sort by src.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)) foo) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL foo)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key + key) + type: double + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: double + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10138]: Expressions are not allowed in a distribute by clause. Use a column alias instead Index: ql/src/test/queries/clientnegative/expr_clusterby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_clusterby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_clusterby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in cluster by without an alias +explain +select key + key as foo, src.value from src cluster by foo, src.value; + +explain +select key + key, src.value from src cluster by key + key, src.value; Index: ql/src/test/queries/clientnegative/expr_distributeby_sortby_1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_distributeby_sortby_1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_distributeby_sortby_1.q (working copy) @@ -0,0 +1,7 @@ +-- expressions are not allowed in a distribute by or a sort by clause +-- without an alias +explain +select key + key as foo, src.value from src distribute by foo sort by src.value; + +explain +select key + key as foo, src.value from src distribute by key + key sort by src.value; Index: ql/src/test/queries/clientnegative/expr_orderby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_orderby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_orderby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in order by without an alias +explain +select length(value) as foo, src.key from src order by foo, src.key; + +explain +select length(value), key from src order by length(value), key; Index: ql/src/test/queries/clientnegative/expr_distributeby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_distributeby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_distributeby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in distribute by without an alias +explain +select key, length(value) as foo from src distribute by key, foo; + +explain +select key, length(value) from src distribute by key, length(value); Index: ql/src/test/queries/clientnegative/expr_sortby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_sortby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_sortby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in sort by without an alias +explain +select key - 10 as foo, value from src sort by foo, value; + +explain +select key - 10, value from src sort by key - 10, value; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1370121) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -113,6 +113,7 @@ import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; +import org.apache.hadoop.hive.ql.parse.QBParseInfo.ClauseType; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; @@ -2027,7 +2028,7 @@ } //if specified generate alias using func name - if(includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)){ + if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) { String expr_flattened = root.toStringTree(); @@ -6144,6 +6145,93 @@ return curr; } + // Expressions are not allowed currently in cluster/distribute/order/sort by. + // It would be good to support them in the future, but till then it is better + // to throw a good semantic error instead of some crpytic error. + private void checkExpression(ASTNode input, + ClauseType clauseType) throws SemanticException { + int childCount = input.getChildCount(); + + // Columns can only exist at the top + if (input.getType() == HiveParser.TOK_TABLE_OR_COL) { + switch (clauseType) { + case CLUSTER_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_CLUSTERBY.getMsg()); + case DISTRIBUTE_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_DISTRIBUTEBY.getMsg()); + case ORDER_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_ORDERBY.getMsg()); + case SORT_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_SORTBY.getMsg()); + } + } + + if (childCount > 0) { + for (int pos = 0; pos < childCount; pos++) { + ASTNode exprChild = (ASTNode) input.getChild(pos); + checkExpression(exprChild, clauseType); + } + } + } + + private void validateExpressionSkipParent(ASTNode inputExpr, + ClauseType clauseType) throws SemanticException { + int childCount = inputExpr.getChildCount(); + if (childCount > 0) { + for (int pos = 0; pos < childCount; pos++) { + checkExpression((ASTNode)inputExpr.getChild(pos), clauseType); + } + } + } + + private void validateExpressionHandleTableQualifier(ASTNode inputExpr, + ClauseType clauseType) throws SemanticException { + // If the expression is tab.column, go to the columns + // Same for value[3] + if ((inputExpr.getType() == HiveParser.DOT) || + (inputExpr.getType() == HiveParser.LSQUARE)) { + for (int pos = 0; pos < inputExpr.getChildCount(); pos++) { + validateExpressionHandleTableQualifier((ASTNode)inputExpr.getChild(pos), clauseType); + } + } else { + validateExpressionSkipParent(inputExpr, clauseType); + } + } + + // Validate that the expression only consists of constants and columns. + // Expressions are not allowed in the cluster/distribute/order/sort by list + private void validateExpression(ASTNode expr, + ClauseType clauseType) throws SemanticException { + + boolean isGrandChild = true; + // The first level of children is whether it is ascending/descending + // for order by and sort by + if ((clauseType == ClauseType.DISTRIBUTE_BY_CLAUSE) || + (clauseType == ClauseType.CLUSTER_BY_CLAUSE)) { + isGrandChild = false; + } + + int ccount = expr.getChildCount(); + for (int i = 0; i < ccount; ++i) { + ASTNode cl = (ASTNode) expr.getChild(i); + if (isGrandChild == false) { + validateExpressionHandleTableQualifier(cl, clauseType); + } else { + int grandChildCount = cl.getChildCount(); + if (grandChildCount > 0) { + for (int childPos = 0; childPos < grandChildCount; childPos++) { + validateExpressionHandleTableQualifier( + (ASTNode)cl.getChild(childPos), clauseType); + } + } + } + } + } + private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) throws SemanticException { @@ -6160,11 +6248,40 @@ curr = genSelectPlan(dest, qb, curr); Integer limit = qbp.getDestLimit(dest); - if (qbp.getClusterByForClause(dest) != null - || qbp.getDistributeByForClause(dest) != null - || qbp.getOrderByForClause(dest) != null - || qbp.getSortByForClause(dest) != null) { + // Expressions are not supported currently without a alias. + // Reduce sink is needed if the query contains a cluster by, distribute by, + // order by or a sort by clause. + boolean genReduceSink = false; + + // Currently, expressions are not allowed in cluster by, distribute by, + // order by or a sort by clause. For each of the above clause types, check + // if the clause contains any expression. + if (qbp.getClusterByForClause(dest) != null) { + validateExpression(qbp.getClusterByForClause(dest), + ClauseType.CLUSTER_BY_CLAUSE); + genReduceSink = true; + } + + if (qbp.getDistributeByForClause(dest) != null) { + validateExpression(qbp.getDistributeByForClause(dest), + ClauseType.DISTRIBUTE_BY_CLAUSE); + genReduceSink = true; + } + + if (qbp.getOrderByForClause(dest) != null) { + validateExpression(qbp.getOrderByForClause(dest), + ClauseType.ORDER_BY_CLAUSE); + genReduceSink = true; + } + + if (qbp.getSortByForClause(dest) != null) { + validateExpression(qbp.getSortByForClause(dest), + ClauseType.SORT_BY_CLAUSE); + genReduceSink = true; + } + + if (genReduceSink) { int numReducers = -1; // Use only 1 reducer if order by is present Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (revision 1370121) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (working copy) @@ -507,4 +507,10 @@ return nameToSample; } + protected static enum ClauseType { + CLUSTER_BY_CLAUSE, + DISTRIBUTE_BY_CLAUSE, + ORDER_BY_CLAUSE, + SORT_BY_CLAUSE + } } Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1370121) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -237,6 +237,15 @@ "If you really want to perform the operation, either remove the " + "mapjoin hint from your query or set hive.enforce.bucketmapjoin to false."), + EXPRESSIONS_NOT_ALLOWED_CLUSTERBY(10137, + "Expressions are not allowed in a cluster by clause. Use a column alias instead"), + EXPRESSIONS_NOT_ALLOWED_DISTRIBUTEBY(10138, + "Expressions are not allowed in a distribute by clause. Use a column alias instead"), + EXPRESSIONS_NOT_ALLOWED_ORDERBY(10139, + "Expressions are not allowed in an order by clause. Use a column alias instead"), + EXPRESSIONS_NOT_ALLOWED_SORTBY(10140, + "Expressions are not allowed in a sort by clause. Use a column alias instead"), + SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " + "It may have crashed with an error."),