diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index 7a7f3ef..f011258 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -935,6 +935,11 @@ protected RowResolver buildRowResolverForWindowing(WindowTableFunctionDef def) } else { rr.put(cInfo.getTabAlias(), colAlias, cInfo); } + + String[] altMapping = inputRR.getAlternateMappings(inpCInfo.getInternalName()); + if ( altMapping != null ) { + rr.put(altMapping[0], altMapping[1], cInfo); + } } return rr; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 908546e..f142f3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -43,6 +43,13 @@ private HashMap> rslvMap; private HashMap invRslvMap; + /* + * now a Column can have an alternate mapping. + * This captures the alternate mapping. + * The primary(first) mapping is still only held in + * invRslvMap. + */ + private Map altInvRslvMap; private Map expressionMap; // TODO: Refactor this and do in a more object oriented manner @@ -55,6 +62,7 @@ public RowResolver() { rowSchema = new RowSchema(); rslvMap = new HashMap>(); invRslvMap = new HashMap(); + altInvRslvMap = new HashMap(); expressionMap = new HashMap(); isExprResolver = false; } @@ -96,8 +104,17 @@ public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { if (rowSchema.getSignature() == null) { rowSchema.setSignature(new ArrayList()); } - - rowSchema.getSignature().add(colInfo); + + /* + * allow multiple mappings to the same ColumnInfo. + * When a ColumnInfo is mapped multiple times, only the + * first inverse mapping is captured. + */ + boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); + + if ( !colPresent ) { + rowSchema.getSignature().add(colInfo); + } LinkedHashMap f_map = rslvMap.get(tab_alias); if (f_map == null) { @@ -109,7 +126,11 @@ public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { String[] qualifiedAlias = new String[2]; qualifiedAlias[0] = tab_alias; qualifiedAlias[1] = col_alias; - invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + if ( !colPresent ) { + invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + } else { + altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + } } public boolean hasTableAlias(String tab_alias) { @@ -149,14 +170,21 @@ public ColumnInfo get(String tab_alias, String col_alias) throws SemanticExcepti ret = f_map.get(col_alias); } else { boolean found = false; - for (LinkedHashMap cmap : rslvMap.values()) { + String foundTbl = null; + for (Map.Entry> rslvEntry: rslvMap.entrySet()) { + String rslvKey = rslvEntry.getKey(); + LinkedHashMap cmap = rslvEntry.getValue(); for (Map.Entry cmapEnt : cmap.entrySet()) { if (col_alias.equalsIgnoreCase(cmapEnt.getKey())) { - if (found) { + /* + * We can have an unaliased and one aliased mapping to a Column. + */ + if (found && foundTbl != null && rslvKey != null) { throw new SemanticException("Column " + col_alias + " Found in more than One Tables/Subqueries"); } found = true; + foundTbl = rslvKey == null ? foundTbl : rslvKey; ret = cmapEnt.getValue(); } } @@ -260,6 +288,10 @@ public void setIsExprResolver(boolean isExprResolver) { public boolean getIsExprResolver() { return isExprResolver; } + + public String[] getAlternateMappings(String internalName) { + return altInvRslvMap.get(internalName); + } @Override public String toString() { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d797407..7979873 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2084,6 +2084,12 @@ private Integer genColListRegex(String colRegex, String tabAlias, if (!aliases.contains("")) { aliases.add(""); } + /* + * track the input ColumnInfos that are added to the output. + * if a columnInfo has multiple mappings; then add the column only once, + * but carry the mappings forward. + */ + Map inputColsProcessed = new HashMap(); // For expr "*", aliases should be iterated in the order they are specified // in the query. for (String alias : aliases) { @@ -2112,16 +2118,21 @@ private Integer genColListRegex(String colRegex, String tabAlias, continue; } - ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), - name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); if (subQuery) { output.checkColumn(tmp[0], tmp[1]); } - col_list.add(expr); - output.put(tmp[0], tmp[1], - new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), - colInfo.isHiddenVirtualCol())); + ColumnInfo oColInfo = inputColsProcessed.get(colInfo); + if (oColInfo == null) { + ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), + name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), + colInfo.isSkewedCol()); + col_list.add(expr); + oColInfo = new ColumnInfo(getColumnInternalName(pos), + colInfo.getType(), colInfo.getTabAlias(), + colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); + inputColsProcessed.put(colInfo, oColInfo); + } + output.put(tmp[0], tmp[1], oColInfo); pos = Integer.valueOf(pos.intValue() + 1); matched++; @@ -2916,6 +2927,14 @@ private static boolean isRegex(String pattern) { colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) .isSkewedCol() : false); out_rwsch.put(tabAlias, colAlias, colInfo); + + if ( exp instanceof ExprNodeColumnDesc ) { + ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp; + String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn()); + if ( altMapping != null ) { + out_rwsch.put(altMapping[0], altMapping[1], colInfo); + } + } pos = Integer.valueOf(pos.intValue() + 1); } @@ -3177,8 +3196,10 @@ private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, .getInternalName(), "", false)); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), null, false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } // For each aggregation @@ -3386,8 +3407,10 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, .getIsVirtualCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), "", false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -4168,8 +4191,10 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); groupByOutputRowResolver2.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), "", false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -10702,7 +10727,10 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, outColName, colInfo.getType(), alias[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); rsNewRR.put(alias[0], alias[1], newColInfo); - + String[] altMapping = inputRR.getAlternateMappings(colInfo.getInternalName()); + if ( altMapping != null ) { + rsNewRR.put(altMapping[0], altMapping[1], newColInfo); + } } input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils @@ -10748,6 +10776,10 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, colsAddedByHaving.put(alias, eColInfo); } } + String[] altMapping = inputRR.getAlternateMappings(colInfo.getInternalName()); + if ( altMapping != null ) { + extractRR.put(altMapping[0], altMapping[1], eColInfo); + } } for(Map.Entry columnAddedByHaving : colsAddedByHaving.entrySet() ) { @@ -10816,4 +10848,40 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, return selSpec; } + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + Operator reduceSinkOp, RowResolver gByRR) { + if ( gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getChild(0).getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( + gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an entry + * based on that tab_alias. + * For e.g. this query: + * select b.x, count(*) from t1 b group by x + * needs (tab_alias=b, col_alias=x) in the GBy RR. + * tab_alias=b comes from looking at the RowResolver that is the ancestor + * before any GBy/ReduceSinks added for the GBY operation. + */ + Operator parent = reduceSinkOp; + while ( parent instanceof ReduceSinkOperator || + parent instanceof GroupByOperator ) { + parent = parent.getParentOperators().get(0); + } + RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); + try { + ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch(SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } } diff --git ql/src/test/queries/clientnegative/clustern1.q ql/src/test/queries/clientnegative/clustern1.q deleted file mode 100644 index 0ff4477..0000000 --- ql/src/test/queries/clientnegative/clustern1.q +++ /dev/null @@ -1,2 +0,0 @@ -EXPLAIN -SELECT x.key, x.value as key FROM SRC x CLUSTER BY key; diff --git ql/src/test/queries/clientnegative/notable_alias3.q ql/src/test/queries/clientnegative/notable_alias3.q deleted file mode 100644 index 6cc3e87..0000000 --- ql/src/test/queries/clientnegative/notable_alias3.q +++ /dev/null @@ -1,4 +0,0 @@ -CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; - -FROM src -INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key; diff --git ql/src/test/queries/clientpositive/groupby_resolution.q ql/src/test/queries/clientpositive/groupby_resolution.q new file mode 100644 index 0000000..1ec9d7c --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_resolution.q @@ -0,0 +1,58 @@ + + +set hive.map.aggr=false; +set hive.groupby.skewindata=false; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=false; +set hive.groupby.skewindata=true; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +-- windowing after group by +select key, count(*), rank() over(order by count(*)) +from src b +where key < '12' +group by b.key; + +-- having after group by +select key, count(*) +from src b +group by b.key +having key < '12'; + +-- having and windowing +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +; + +explain +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +; + +-- order by +select key +from src t +where key < '12' +group by t.key +order by t.key; + +-- cluster by +EXPLAIN +SELECT x.key, x.value as key FROM SRC x CLUSTER BY key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/notable_alias3.q ql/src/test/queries/clientpositive/notable_alias3.q new file mode 100644 index 0000000..aa79674 --- /dev/null +++ ql/src/test/queries/clientpositive/notable_alias3.q @@ -0,0 +1,4 @@ +CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key; \ No newline at end of file diff --git ql/src/test/results/clientnegative/clustern1.q.out ql/src/test/results/clientnegative/clustern1.q.out deleted file mode 100644 index 7c33af4..0000000 --- ql/src/test/results/clientnegative/clustern1.q.out +++ /dev/null @@ -1 +0,0 @@ -FAILED: SemanticException Column key Found in more than One Tables/Subqueries diff --git ql/src/test/results/clientnegative/notable_alias3.q.out ql/src/test/results/clientnegative/notable_alias3.q.out deleted file mode 100644 index cadca6e..0000000 --- ql/src/test/results/clientnegative/notable_alias3.q.out +++ /dev/null @@ -1,6 +0,0 @@ -PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest1 -FAILED: SemanticException [Error 10025]: Line 4:44 Expression not in GROUP BY key 'key' diff --git ql/src/test/results/clientpositive/groupby_resolution.q.out ql/src/test/results/clientpositive/groupby_resolution.q.out new file mode 100644 index 0000000..10a9b77 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_resolution.q.out @@ -0,0 +1,1021 @@ +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- windowing after group by +select key, count(*), rank() over(order by count(*)) +from src b +where key < '12' +group by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- windowing after group by +select key, count(*), rank() over(order by count(*)) +from src b +where key < '12' +group by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +10 1 1 +105 1 1 +11 1 1 +111 1 1 +114 1 1 +116 1 1 +118 2 7 +100 2 7 +103 2 7 +104 2 7 +113 2 7 +0 3 12 +119 3 12 +PREHOOK: query: -- having after group by +select key, count(*) +from src b +group by b.key +having key < '12' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- having after group by +select key, count(*) +from src b +group by b.key +having key < '12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +10 1 +100 2 +103 2 +104 2 +105 1 +11 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +PREHOOK: query: -- having and windowing +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- having and windowing +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +10 1 1 +105 1 1 +11 1 1 +111 1 1 +114 1 1 +116 1 1 +118 2 7 +100 2 7 +103 2 7 +104 2 7 +113 2 7 +0 3 12 +119 3 12 +PREHOOK: query: explain +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_FUNCTIONSTAR count)))))))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)) (TOK_HAVING (< (TOK_TABLE_OR_COL key) '12')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (key < '12') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: 0 + type: int + expr: _col1 + type: bigint + sort order: ++ + Map-reduce partition columns: + expr: 0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + PTF Operator + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _wcol0 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- order by +select key +from src t +where key < '12' +group by t.key +order by t.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- order by +select key +from src t +where key < '12' +group by t.key +order by t.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +PREHOOK: query: -- cluster by +EXPLAIN +SELECT x.key, x.value as key FROM SRC x CLUSTER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- cluster by +EXPLAIN +SELECT x.key, x.value as key FROM SRC x CLUSTER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) key)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + x + TableScan + alias: x + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/notable_alias3.q.out ql/src/test/results/clientpositive/notable_alias3.q.out new file mode 100644 index 0000000..ab809a4 --- /dev/null +++ ql/src/test/results/clientpositive/notable_alias3.q.out @@ -0,0 +1,18 @@ +PREHOOK: query: CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(c string, key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, sum(src.value) WHERE src.key < 100 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c SIMPLE [] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/compiler/errors/nonkey_groupby.q.out ql/src/test/results/compiler/errors/nonkey_groupby.q.out index a13d45d..9dfbb97 100644 --- ql/src/test/results/compiler/errors/nonkey_groupby.q.out +++ ql/src/test/results/compiler/errors/nonkey_groupby.q.out @@ -1,2 +1,2 @@ Semantic Exception: -Line 2:44 Expression not in GROUP BY key 'value' \ No newline at end of file +Line 2:48 Invalid column reference 'value' \ No newline at end of file diff --git ql/src/test/results/compiler/plan/groupby1.q.xml ql/src/test/results/compiler/plan/groupby1.q.xml index 485c323..a8ec2af 100755 --- ql/src/test/results/compiler/plan/groupby1.q.xml +++ ql/src/test/results/compiler/plan/groupby1.q.xml @@ -1408,6 +1408,9 @@ _col0 + + src + diff --git ql/src/test/results/compiler/plan/groupby5.q.xml ql/src/test/results/compiler/plan/groupby5.q.xml index abdbff0..c53af00 100644 --- ql/src/test/results/compiler/plan/groupby5.q.xml +++ ql/src/test/results/compiler/plan/groupby5.q.xml @@ -1281,6 +1281,9 @@ _col0 + + src +