diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 908546e..1c5c7a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -96,8 +96,17 @@ public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { if (rowSchema.getSignature() == null) { rowSchema.setSignature(new ArrayList()); } - - rowSchema.getSignature().add(colInfo); + + /* + * allow multiple mappings to the same ColumnInfo. + * When a ColumnInfo is mapped multiple times, only the + * first inverse mapping is captured. + */ + boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); + + if ( !colPresent ) { + rowSchema.getSignature().add(colInfo); + } LinkedHashMap f_map = rslvMap.get(tab_alias); if (f_map == null) { @@ -106,10 +115,12 @@ public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { } f_map.put(col_alias, colInfo); - String[] qualifiedAlias = new String[2]; - qualifiedAlias[0] = tab_alias; - qualifiedAlias[1] = col_alias; - invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + if ( !colPresent ) { + String[] qualifiedAlias = new String[2]; + qualifiedAlias[0] = tab_alias; + qualifiedAlias[1] = col_alias; + invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + } } public boolean hasTableAlias(String tab_alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 67ad6cb..7a67c5b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2021,6 +2021,12 @@ private Integer genColListRegex(String colRegex, String tabAlias, if (!aliases.contains("")) { aliases.add(""); } + /* + * track the input ColumnInfos that are added to the output. + * if a columnInfo has multiple mappings; then add the column only once, + * but carry the mappings forward. + */ + Map inputColsProcessed = new HashMap(); // For expr "*", aliases should be iterated in the order they are specified // in the query. for (String alias : aliases) { @@ -2049,16 +2055,21 @@ private Integer genColListRegex(String colRegex, String tabAlias, continue; } - ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), - name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); if (subQuery) { output.checkColumn(tmp[0], tmp[1]); } - col_list.add(expr); - output.put(tmp[0], tmp[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), - colInfo.isHiddenVirtualCol())); + ColumnInfo oColInfo = inputColsProcessed.get(colInfo); + if (oColInfo == null) { + ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), + name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), + colInfo.isSkewedCol()); + col_list.add(expr); + oColInfo = new ColumnInfo(getColumnInternalName(pos), + colInfo.getType(), colInfo.getTabAlias(), + colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); + inputColsProcessed.put(colInfo, oColInfo); + } + output.put(tmp[0], tmp[1], oColInfo); pos = Integer.valueOf(pos.intValue() + 1); matched++; @@ -3114,8 +3125,10 @@ private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, .getInternalName(), "", false)); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), null, false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } // For each aggregation @@ -3323,8 +3336,10 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, .getIsVirtualCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), "", false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -4105,8 +4120,10 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); groupByOutputRowResolver2.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), "", false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -10741,4 +10758,40 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, return selSpec; } + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + Operator reduceSinkOp, RowResolver gByRR) { + if ( gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getChild(0).getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( + gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an entry + * based on that tab_alias. + * For e.g. this query: + * select b.x, count(*) from t1 b group by x + * needs (tab_alias=b, col_alias=x) in the GBy RR. + * tab_alias=b comes from looking at the RowResolver that is the ancestor + * before any GBy/ReduceSinks added for the GBY operation. + */ + Operator parent = reduceSinkOp; + while ( parent instanceof ReduceSinkOperator || + parent instanceof GroupByOperator ) { + parent = parent.getParentOperators().get(0); + } + RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); + try { + ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch(SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } } diff --git ql/src/test/queries/clientnegative/notable_alias3.q ql/src/test/queries/clientnegative/notable_alias3.q deleted file mode 100644 index 6cc3e87..0000000 --- ql/src/test/queries/clientnegative/notable_alias3.q +++ /dev/null @@ -1,4 +0,0 @@ -CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; - -FROM src -INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key; diff --git ql/src/test/queries/clientpositive/groupby_resolution.q ql/src/test/queries/clientpositive/groupby_resolution.q new file mode 100644 index 0000000..a1fc18d --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_resolution.q @@ -0,0 +1,21 @@ + + +set hive.map.aggr=false; +set hive.groupby.skewindata=false; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=false; +set hive.groupby.skewindata=true; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/notable_alias3.q ql/src/test/queries/clientpositive/notable_alias3.q new file mode 100644 index 0000000..aa79674 --- /dev/null +++ ql/src/test/queries/clientpositive/notable_alias3.q @@ -0,0 +1,4 @@ +CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key; \ No newline at end of file diff --git ql/src/test/results/clientnegative/notable_alias3.q.out ql/src/test/results/clientnegative/notable_alias3.q.out deleted file mode 100644 index cadca6e..0000000 --- ql/src/test/results/clientnegative/notable_alias3.q.out +++ /dev/null @@ -1,6 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest1 -FAILED: SemanticException [Error 10025]: Line 4:44 Expression not in GROUP BY key 'key' diff --git ql/src/test/results/clientpositive/groupby_resolution.q.out ql/src/test/results/clientpositive/groupby_resolution.q.out new file mode 100644 index 0000000..c426540 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_resolution.q.out @@ -0,0 +1,688 @@ +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/notable_alias3.q.out ql/src/test/results/clientpositive/notable_alias3.q.out new file mode 100644 index 0000000..ab809a4 --- /dev/null +++ ql/src/test/results/clientpositive/notable_alias3.q.out @@ -0,0 +1,18 @@ +PREHOOK: query: CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c SIMPLE [] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/compiler/errors/nonkey_groupby.q.out ql/src/test/results/compiler/errors/nonkey_groupby.q.out index a13d45d..9dfbb97 100644 --- ql/src/test/results/compiler/errors/nonkey_groupby.q.out +++ ql/src/test/results/compiler/errors/nonkey_groupby.q.out @@ -1,2 +1,2 @@ Semantic Exception: -Line 2:44 Expression not in GROUP BY key 'value' \ No newline at end of file +Line 2:48 Invalid column reference 'value' \ No newline at end of file diff --git ql/src/test/results/compiler/plan/groupby1.q.xml ql/src/test/results/compiler/plan/groupby1.q.xml index 485c323..a8ec2af 100755 --- ql/src/test/results/compiler/plan/groupby1.q.xml +++ ql/src/test/results/compiler/plan/groupby1.q.xml @@ -1408,6 +1408,9 @@ _col0 + + src + diff --git ql/src/test/results/compiler/plan/groupby5.q.xml ql/src/test/results/compiler/plan/groupby5.q.xml index abdbff0..c53af00 100644 --- ql/src/test/results/compiler/plan/groupby5.q.xml +++ ql/src/test/results/compiler/plan/groupby5.q.xml @@ -1281,6 +1281,9 @@ _col0 + + src +