Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1203044) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -27,12 +27,14 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; +import java.util.Vector; +import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Path; @@ -48,6 +50,7 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.ArchiveUtils; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExecDriver; @@ -58,7 +61,6 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; -import org.apache.hadoop.hive.ql.exec.ArchiveUtils; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.RecordReader; @@ -95,7 +97,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; @@ -105,6 +106,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -125,7 +127,6 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; @@ -148,12 +149,13 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -161,9 +163,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -212,6 +214,8 @@ //Max characters when auto generating the column name with func name private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; + private static final String DELIMITER = "_"; + public static class GlobalLimitCtx { private boolean enable = false; private int globalLimit = -1; @@ -2223,6 +2227,14 @@ } if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + QBParseInfo qbp = qb.getParseInfo(); + TreeSet ks = new TreeSet(qbp.getClauseNames()); + // If the query is not a groupby query. + if (!qb.getParseInfo().getDestToGroupBy().isEmpty()) { + for (String clause : ks) { + validateGroupBy(qb, expr, clause); + } + } pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode)expr.getChild(0)).toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases()); @@ -2301,6 +2313,102 @@ return output; } + private void validateGroupBy(QB qb, ASTNode astNode, String clause) + throws SemanticException { + + List fieldNameValues = constructSelcols(qb, astNode, clause); + Set groupByNames = constructGroupByCols(qb, clause); + + // validate the column names in select clause with columns specified in group by clause. + for (String fieldNameValue : fieldNameValues) { + if (!groupByNames.contains(fieldNameValue)) { + throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(fieldNameValue)); + } + } + } + + private Set constructGroupByCols(QB qb, String clause) { + ASTNode groupByNode = qb.getParseInfo().getDestToGroupBy().get(clause); + int groupByElemntsCount = groupByNode.getChildCount(); + Set groupByNames = new HashSet(groupByElemntsCount, 1.0f); + for (int i = 0; i < groupByElemntsCount; i++) { + Tree node = groupByNode.getChild(i); + int childCount = node.getChildCount(); + if (childCount == 2) { + groupByNames.add(node.getChild(0).getChild(0).getText() + + DELIMITER + node.getChild(1).getText()); + } else if (childCount == 1) { + if (0 != qb.getTabAliases().toArray().length) { + groupByNames.add(qb.getTabAliases().toArray()[0] + + DELIMITER + node.getChild(0).getText()); + } else { + groupByNames.add(node.getChild(0).getText()); + } + } + } + return groupByNames; + } + + private List constructSelcols(QB qb, ASTNode astNode, String clause) { + List fieldNameValues = new ArrayList(); + // get the column names prefixed with alias. + if (astNode.getChildCount() == 0) { + // get the column names for queries specified + // like select * from table group by x; + if (!qb.getTabAliases().isEmpty()) { + for (String alias : qb.getTabAliases()) { + Table srcForAlias = qb.getMetaData().getSrcForAlias(alias); + if (null != srcForAlias) { + ArrayList fields = srcForAlias.getFields(); + for (StructField structField : fields) { + fieldNameValues.add(alias + DELIMITER + structField.getFieldName()); + } + } + } + } else if (qb.getSubqAliases().size() != 0) { + // get the columns name from the sub query like + // select * from (select a,b from test1)f group by a,b; + QBExpr subqForAlias = qb.getSubqForAlias(qb.getAliases().get(0)); + if (subqForAlias != null) { + ASTNode selForClause = subqForAlias.getQB().getParseInfo().getSelForClause(clause); + if (null != selForClause) { + ArrayList children = selForClause.getChildren(); + for (Node node : children) { + fieldNameValues.add(node.getChildren().get(0).getChildren().get(0).toString()); + } + } + } + } + } else { + // get the column names for the queries specified + // like select a.* from table a group by a.x; + int nodeCount = astNode.getChildCount(); + for (int i = 0; i < nodeCount; i++) { + String alias = astNode.getChild(i).getChild(0).getText(); + Table srcForAlias = qb.getMetaData().getSrcForAlias(alias); + if (null != srcForAlias) { + ArrayList fields = srcForAlias.getFields(); + for (StructField structField : fields) { + fieldNameValues.add(alias + DELIMITER + structField.getFieldName()); + } + } else if (qb.getSubqForAlias(alias) != null) { + // get the columns name from the sub query like + // select f.* from (select a,b from test1)f group by f.a,f.b; + QBExpr subqForAlias = qb.getSubqForAlias(alias); + ASTNode selForClause = subqForAlias.getQB().getParseInfo().getSelForClause(clause); + if (null != selForClause) { + ArrayList children = selForClause.getChildren(); + for (Node node : children) { + fieldNameValues.add(alias + DELIMITER + + node.getChildren().get(0).getChildren().get(0).toString()); + } + } + } + } + } + return fieldNameValues; + } + /** * Class to store GenericUDAF related information. */ Index: ql/src/test/queries/clientnegative/groupby_select_all.q =================================================================== --- ql/src/test/queries/clientnegative/groupby_select_all.q (revision 0) +++ ql/src/test/queries/clientnegative/groupby_select_all.q (revision 0) @@ -0,0 +1,9 @@ +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +SELECT * FROM dest1 GROUP BY key,c1; Index: ql/src/test/queries/clientnegative/groupby_select_all_alias.q =================================================================== --- ql/src/test/queries/clientnegative/groupby_select_all_alias.q (revision 0) +++ ql/src/test/queries/clientnegative/groupby_select_all_alias.q (revision 0) @@ -0,0 +1,9 @@ +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1; Index: ql/src/test/queries/clientnegative/groupby_select_all_join.q =================================================================== --- ql/src/test/queries/clientnegative/groupby_select_all_join.q (revision 0) +++ ql/src/test/queries/clientnegative/groupby_select_all_join.q (revision 0) @@ -0,0 +1,9 @@ +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.c2,y.key; Index: ql/src/test/queries/clientnegative/groupby_select_all_subquery.q =================================================================== --- ql/src/test/queries/clientnegative/groupby_select_all_subquery.q (revision 0) +++ ql/src/test/queries/clientnegative/groupby_select_all_subquery.q (revision 0) @@ -0,0 +1,9 @@ +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +select * from (select key,c1 from dest1) x group by key; Index: ql/src/test/queries/clientnegative/groupby_select_all_subquery_unionall.q =================================================================== --- ql/src/test/queries/clientnegative/groupby_select_all_subquery_unionall.q (revision 0) +++ ql/src/test/queries/clientnegative/groupby_select_all_subquery_unionall.q (revision 0) @@ -0,0 +1,9 @@ +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key having key=1; Index: ql/src/test/queries/clientpositive/groupby_select_all.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_select_all.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_select_all.q (revision 0) @@ -0,0 +1,34 @@ +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1); + +EXPLAIN SELECT * FROM dest1 GROUP BY key,c1,c2; + +EXPLAIN SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1,ali.c2; + +EXPLAIN select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.key,y.c1,y.c2; + +EXPLAIN select * from (select key,c1 from dest1) x group by key,c1; + +EXPLAIN select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key, c1 having key=1; + +SELECT * FROM dest1 GROUP BY key,c1,c2; + +SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1,ali.c2; + +select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.key,y.c1,y.c2; + +select * from (select key,c1 from dest1) x group by key,c1; + +select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key, c1 having key=1; Index: ql/src/test/results/clientnegative/groupby_select_all.q.out =================================================================== --- ql/src/test/results/clientnegative/groupby_select_all.q.out (revision 0) +++ ql/src/test/results/clientnegative/groupby_select_all.q.out (revision 0) @@ -0,0 +1,27 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +FAILED: Error in semantic analysis: Expression not in GROUP BY key dest1_c2 Index: ql/src/test/results/clientnegative/groupby_select_all_alias.q.out =================================================================== --- ql/src/test/results/clientnegative/groupby_select_all_alias.q.out (revision 0) +++ ql/src/test/results/clientnegative/groupby_select_all_alias.q.out (revision 0) @@ -0,0 +1,27 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +FAILED: Error in semantic analysis: Expression not in GROUP BY key ali_c2 Index: ql/src/test/results/clientnegative/groupby_select_all_join.q.out =================================================================== --- ql/src/test/results/clientnegative/groupby_select_all_join.q.out (revision 0) +++ ql/src/test/results/clientnegative/groupby_select_all_join.q.out (revision 0) @@ -0,0 +1,27 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +FAILED: Error in semantic analysis: Expression not in GROUP BY key y_c1 Index: ql/src/test/results/clientnegative/groupby_select_all_subquery.q.out =================================================================== --- ql/src/test/results/clientnegative/groupby_select_all_subquery.q.out (revision 0) +++ ql/src/test/results/clientnegative/groupby_select_all_subquery.q.out (revision 0) @@ -0,0 +1,27 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +FAILED: Error in semantic analysis: Expression not in GROUP BY key c1 Index: ql/src/test/results/clientnegative/groupby_select_all_subquery_unionall.q.out =================================================================== --- ql/src/test/results/clientnegative/groupby_select_all_subquery_unionall.q.out (revision 0) +++ ql/src/test/results/clientnegative/groupby_select_all_subquery_unionall.q.out (revision 0) @@ -0,0 +1,27 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +FAILED: Error in semantic analysis: Expression not in GROUP BY key c1 Index: ql/src/test/results/clientpositive/groupby_select_all.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_select_all.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_select_all.q.out (revision 0) @@ -0,0 +1,959 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Needs Tagging: false + Path -> Alias: + pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [src] + pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [src] + Path -> Partition: + pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1321989706 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1321989703 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1321989706 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1321989703 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: pfile:/home/opensrc/18nov/workspace/build/ql/scratchdir/hive_2011-11-22_11-21-55_108_4152156565914556125/-ext-10000 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: pfile:/home/opensrc/18nov/workspace/build/ql/scratchdir/hive_2011-11-22_11-21-55_108_4152156565914556125/-ext-10000/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2 + columns.types string:int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1321989715 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: pfile:/home/opensrc/18nov/workspace/build/ql/scratchdir/hive_2011-11-22_11-21-55_108_4152156565914556125/-ext-10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2 + columns.types string:int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/opensrc/18nov/workspace/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1321989715 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + tmp directory: pfile:/home/opensrc/18nov/workspace/build/ql/scratchdir/hive_2011-11-22_11-21-55_108_4152156565914556125/-ext-10001 + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/home/opensrc/18nov/workspace/build/ql/scratchdir/hive_2011-11-22_11-21-55_108_4152156565914556125/-ext-10000/ + + +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT * FROM dest1 GROUP BY key,c1,c2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * FROM dest1 GROUP BY key,c1,c2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL c1) (TOK_TABLE_OR_COL c2)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + dest1 + TableScan + alias: dest1 + Select Operator + expressions: + expr: key + type: string + expr: c1 + type: int + expr: c2 + type: string + outputColumnNames: key, c1, c2 + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: c1 + type: int + expr: c2 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: EXPLAIN SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1,ali.c2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1,ali.c2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1) ali)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL ali) key) (. (TOK_TABLE_OR_COL ali) c1) (. (TOK_TABLE_OR_COL ali) c2)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + ali + TableScan + alias: ali + Select Operator + expressions: + expr: key + type: string + expr: c1 + type: int + expr: c2 + type: string + outputColumnNames: key, c1, c2 + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: c1 + type: int + expr: c2 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: EXPLAIN select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.key,y.c1,y.c2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.key,y.c1,y.c2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME dest1) x) (TOK_TABREF (TOK_TABNAME dest1) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) c1) (. (TOK_TABLE_OR_COL x) c2) (. (TOK_TABLE_OR_COL y) key) (. (TOK_TABLE_OR_COL y) c1) (. (TOK_TABLE_OR_COL y) c2)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + x + TableScan + alias: x + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: c1 + type: int + expr: c2 + type: string + y + TableScan + alias: y + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + expr: c1 + type: int + expr: c2 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: string + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: string + expr: _col6 + type: int + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/root/hive_2011-11-22_11-22-11_401_7210894970782461119/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + sort order: ++++++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + expr: KEY._col3 + type: string + expr: KEY._col4 + type: int + expr: KEY._col5 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: EXPLAIN select * from (select key,c1 from dest1) x group by key,c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from (select key,c1 from dest1) x group by key,c1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL c1))))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL c1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + x:dest1 + TableScan + alias: dest1 + Select Operator + expressions: + expr: key + type: string + expr: c1 + type: int + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: EXPLAIN select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key, c1 having key=1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key, c1 having key=1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1) c)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL c1))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dest1) d)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL c1)))))) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL c1))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL c1)) (TOK_HAVING (= (TOK_TABLE_OR_COL key) 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a-subquery1:b-subquery1:c + TableScan + alias: c + Filter Operator + predicate: + expr: (key = 1) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: c1 + type: int + outputColumnNames: _col0, _col1 + Union + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + tag: -1 + a-subquery2:b-subquery2:d + TableScan + alias: d + Filter Operator + predicate: + expr: (key = 1) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: c1 + type: int + outputColumnNames: _col0, _col1 + Union + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM dest1 GROUP BY key,c1,c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-11_867_1296408301198800349/-mr-10000 +POSTHOOK: query: SELECT * FROM dest1 GROUP BY key,c1,c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-11_867_1296408301198800349/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +0 1 00.0 +1 71 132828.0 +2 69 251142.0 +3 62 364008.0 +4 74 4105526.0 +5 6 5794.0 +6 5 6796.0 +7 6 71470.0 +8 8 81524.0 +9 7 92094.0 +PREHOOK: query: SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1,ali.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-16_214_6286466986811826370/-mr-10000 +POSTHOOK: query: SELECT * FROM dest1 ali GROUP BY ali.key,ali.c1,ali.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-16_214_6286466986811826370/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +0 1 00.0 +1 71 132828.0 +2 69 251142.0 +3 62 364008.0 +4 74 4105526.0 +5 6 5794.0 +6 5 6796.0 +7 6 71470.0 +8 8 81524.0 +9 7 92094.0 +PREHOOK: query: select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.key,y.c1,y.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-20_355_6745525753368458740/-mr-10000 +POSTHOOK: query: select * from dest1 x join dest1 y on (x.key=y.key) group by x.key,x.c1,x.c2,y.key,y.c1,y.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-20_355_6745525753368458740/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +0 1 00.0 0 1 00.0 +1 71 132828.0 1 71 132828.0 +2 69 251142.0 2 69 251142.0 +3 62 364008.0 3 62 364008.0 +4 74 4105526.0 4 74 4105526.0 +5 6 5794.0 5 6 5794.0 +6 5 6796.0 6 5 6796.0 +7 6 71470.0 7 6 71470.0 +8 8 81524.0 8 8 81524.0 +9 7 92094.0 9 7 92094.0 +PREHOOK: query: select * from (select key,c1 from dest1) x group by key,c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-28_505_6023795571848972153/-mr-10000 +POSTHOOK: query: select * from (select key,c1 from dest1) x group by key,c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-28_505_6023795571848972153/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +0 1 +1 71 +2 69 +3 62 +4 74 +5 6 +6 5 +7 6 +8 8 +9 7 +PREHOOK: query: select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key, c1 having key=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-32_577_3465763044021066735/-mr-10000 +POSTHOOK: query: select * from (select key, c1 from (select key, c1 from dest1 c union all select key, c1 from dest1 d) b) a group by key, c1 having key=1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/tmp/root/hive_2011-11-22_11-22-32_577_3465763044021066735/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +1 71