Index: ql/src/test/results/clientnegative/expr_distributeby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_distributeby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_distributeby1.q.out (working copy) @@ -0,0 +1,57 @@ +PREHOOK: query: -- expressions are not allowed in distribute by without an alias +explain +select key, length(value) as foo from src distribute by key, foo +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in distribute by without an alias +explain +select key, length(value) as foo from src distribute by key, foo +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL value)) foo)) (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL foo)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: length(value) + type: int + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: int + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10133]: Expressions are not allowed in a distribute by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_sortby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_sortby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_sortby1.q.out (working copy) @@ -0,0 +1,57 @@ +PREHOOK: query: -- expressions are not allowed in sort by without an alias +explain +select key - 10 as foo, value from src sort by foo, value +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in sort by without an alias +explain +select key - 10 as foo, value from src sort by foo, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) 10) foo) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL foo)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key - 10) + type: double + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10135]: Expressions are not allowed in a sort by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_orderby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_orderby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_orderby1.q.out (working copy) @@ -0,0 +1,57 @@ +PREHOOK: query: -- expressions are not allowed in order by without an alias +explain +select length(value) as foo, src.key from src order by foo, src.key +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in order by without an alias +explain +select length(value) as foo, src.key from src order by foo, src.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL value)) foo) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL foo)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: length(value) + type: int + expr: key + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10134]: Expressions are not allowed in an order by clause. Use a column alias instead Index: ql/src/test/results/clientnegative/expr_clusterby1.q.out =================================================================== --- ql/src/test/results/clientnegative/expr_clusterby1.q.out (revision 0) +++ ql/src/test/results/clientnegative/expr_clusterby1.q.out (working copy) @@ -0,0 +1,62 @@ +PREHOOK: query: -- expressions are not allowed in cluster by without an alias +explain +select key + key as foo, src.value from src cluster by foo, src.value +PREHOOK: type: QUERY +POSTHOOK: query: -- expressions are not allowed in cluster by without an alias +explain +select key + key as foo, src.value from src cluster by foo, src.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)) foo) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_CLUSTERBY (TOK_TABLE_OR_COL foo) (. (TOK_TABLE_OR_COL src) value)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key + key) + type: double + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: double + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +FAILED: SemanticException [Error 10132]: Expressions are not allowed in a cluster by clause. Use a column alias instead Index: ql/src/test/queries/clientnegative/expr_clusterby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_clusterby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_clusterby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in cluster by without an alias +explain +select key + key as foo, src.value from src cluster by foo, src.value; + +explain +select key + key, src.value from src cluster by key + key, src.value; Index: ql/src/test/queries/clientnegative/expr_orderby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_orderby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_orderby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in order by without an alias +explain +select length(value) as foo, src.key from src order by foo, src.key; + +explain +select length(value), key from src order by length(value), key; Index: ql/src/test/queries/clientnegative/expr_distributeby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_distributeby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_distributeby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in distribute by without an alias +explain +select key, length(value) as foo from src distribute by key, foo; + +explain +select key, length(value) from src distribute by key, length(value); Index: ql/src/test/queries/clientnegative/expr_sortby1.q =================================================================== --- ql/src/test/queries/clientnegative/expr_sortby1.q (revision 0) +++ ql/src/test/queries/clientnegative/expr_sortby1.q (working copy) @@ -0,0 +1,6 @@ +-- expressions are not allowed in sort by without an alias +explain +select key - 10 as foo, value from src sort by foo, value; + +explain +select key - 10, value from src sort by key - 10, value; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1366288) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -2070,7 +2070,7 @@ } //if specified generate alias using func name - if(includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)){ + if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) { String expr_flattened = root.toStringTree(); @@ -6187,6 +6187,100 @@ return curr; } + public static enum ExpressionType { + CLUSTER_BY_CLAUSE, + DISTRIBUTE_BY_CLAUSE, + ORDER_BY_CLAUSE, + SORT_BY_CLAUSE + } + + // Expressions are not allowed in the cluster/distribute/order/sort by list + private void checkExpression(ASTNode input, + ExpressionType expressionType) throws SemanticException { + int childCount = input.getChildCount(); + + // Columns can only exist at the top + if (input.getType() == HiveParser.TOK_TABLE_OR_COL) { + switch (expressionType) { + case CLUSTER_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_CLUSTERBY.getMsg()); + case DISTRIBUTE_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_DISTRIBUTEBY.getMsg()); + case ORDER_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_ORDERBY.getMsg()); + case SORT_BY_CLAUSE: + throw new + SemanticException(ErrorMsg.EXPRESSIONS_NOT_ALLOWED_SORTBY.getMsg()); + } + } + + if (childCount > 0) { + for (int pos = 0; pos < childCount; pos++) { + ASTNode exprChild = (ASTNode) input.getChild(pos); + checkExpression(exprChild, expressionType); + } + } + } + + private void validateExpressionSkipParent(ASTNode inputExpr, + ExpressionType expressionType) throws SemanticException { + int childCount = inputExpr.getChildCount(); + if (childCount > 0) { + for (int pos = 0; pos < childCount; pos++) { + checkExpression((ASTNode)inputExpr.getChild(pos), expressionType); + } + } + } + + private void validateExpressionHandleTableQualifier(ASTNode inputExpr, + ExpressionType expressionType) throws SemanticException { + // If the expression is tab.column, go to the columns + // Same for value[3] + if ((inputExpr.getType() == HiveParser.DOT) || + (inputExpr.getType() == HiveParser.LSQUARE)) { + for (int pos = 0; pos < inputExpr.getChildCount(); pos++) { + validateExpressionHandleTableQualifier((ASTNode)inputExpr.getChild(pos), expressionType); + } + } + else { + validateExpressionSkipParent(inputExpr, expressionType); + } + } + + // Validate that the expression only consists of constants and columns. + // Expressions are not allowed in the cluster/distribute/order/sort by list + private void validateExpression(ASTNode expr, + ExpressionType expressionType) throws SemanticException { + + boolean grandChild = true; + // The first level of children is whether it is ascending/descending + // for order by and sort by + if ((expressionType == ExpressionType.DISTRIBUTE_BY_CLAUSE) || + (expressionType == ExpressionType.CLUSTER_BY_CLAUSE)) { + grandChild = false; + } + + int ccount = expr.getChildCount(); + for (int i = 0; i < ccount; ++i) { + ASTNode cl = (ASTNode) expr.getChild(i); + if (grandChild == false) { + validateExpressionHandleTableQualifier(cl, expressionType); + } + else { + int grandChildCount = cl.getChildCount(); + if (grandChildCount > 0) { + for (int childPos = 0; childPos < grandChildCount; childPos++) { + validateExpressionHandleTableQualifier( + (ASTNode)cl.getChild(childPos), expressionType); + } + } + } + } + } + private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) throws SemanticException { @@ -6203,11 +6297,30 @@ curr = genSelectPlan(dest, qb, curr); Integer limit = qbp.getDestLimit(dest); - if (qbp.getClusterByForClause(dest) != null - || qbp.getDistributeByForClause(dest) != null - || qbp.getOrderByForClause(dest) != null - || qbp.getSortByForClause(dest) != null) { + // Expressions are not supported currently without a alias. + ASTNode checkExpr = null; + ExpressionType expressionType = null; + if (qbp.getClusterByForClause(dest) != null) { + checkExpr = qbp.getClusterByForClause(dest); + expressionType = ExpressionType.CLUSTER_BY_CLAUSE; + } + else if (qbp.getDistributeByForClause(dest) != null) { + checkExpr = qbp.getDistributeByForClause(dest); + expressionType = ExpressionType.DISTRIBUTE_BY_CLAUSE; + } + else if (qbp.getOrderByForClause(dest) != null) { + checkExpr = qbp.getOrderByForClause(dest); + expressionType = ExpressionType.ORDER_BY_CLAUSE; + } + else if (qbp.getSortByForClause(dest) != null) { + checkExpr = qbp.getSortByForClause(dest); + expressionType = ExpressionType.SORT_BY_CLAUSE; + } + + if (checkExpr != null) { + validateExpression(checkExpr, expressionType); + int numReducers = -1; // Use only 1 reducer if order by is present Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1366288) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -220,6 +220,14 @@ "partitioned table is not allowed. It may lead to wrong results for " + "older partitions"), + EXPRESSIONS_NOT_ALLOWED_CLUSTERBY(10132, + "Expressions are not allowed in a cluster by clause. Use a column alias instead"), + EXPRESSIONS_NOT_ALLOWED_DISTRIBUTEBY(10133, + "Expressions are not allowed in a distribute by clause. Use a column alias instead"), + EXPRESSIONS_NOT_ALLOWED_ORDERBY(10134, + "Expressions are not allowed in an order by clause. Use a column alias instead"), + EXPRESSIONS_NOT_ALLOWED_SORTBY(10135, + "Expressions are not allowed in a sort by clause. Use a column alias instead"), SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "