diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 422c6b2..6bf8219 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -406,6 +406,7 @@ public class HiveConf extends Configuration { HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5), HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true), HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false), + HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false), // for hive udtf operator HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false), diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index f7c2812..d4b7a2c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -302,6 +302,12 @@ public enum ErrorMsg { "ALTER VIEW AS SELECT if the view has paritions\n"), EXISTING_TABLE_IS_NOT_VIEW(10218, "Existing table is not a view\n"), + NO_SUPPORTED_ORDERBY_ALLCOLREF_POS(10219, + "Position in ORDER BY is not supported when using SELECT *"), + INVALID_POSITION_ALIAS_IN_GROUPBY(10220, + "Invalid position alias in Group By\n"), + INVALID_POSITION_ALIAS_IN_ORDERBY(10221, + "Invalid position alias in Order By\n"), SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4fd0d90..2b64333 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -33,6 +33,7 @@ import java.util.TreeSet; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.antlr.runtime.tree.BaseTree; import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.ContentSummary; @@ -8200,6 +8201,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { LOG.info("Starting Semantic Analysis"); + // analyze and process the position alias + processPositionAlias(ast); + // analyze create table command if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { // if it is not CTAS, we don't need to go further and just return @@ -9134,6 +9138,98 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); } + // Process the position alias in GROUPBY and ORDERBY + private void processPositionAlias(ASTNode ast) throws SemanticException { + if (HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS) == false) { + return; + } + + if (ast.getChildCount() == 0) { + return; + } + + boolean isAllCol; + ASTNode selectNode = null; + ASTNode groupbyNode = null; + ASTNode orderbyNode = null; + + // get node type + int child_count = ast.getChildCount(); + for (int child_pos = 0; child_pos < child_count; ++child_pos) { + ASTNode node = (ASTNode) ast.getChild(child_pos); + int type = node.getToken().getType(); + if (type == HiveParser.TOK_SELECT) { + selectNode = node; + } else if (type == HiveParser.TOK_GROUPBY) { + groupbyNode = node; + } else if (type == HiveParser.TOK_ORDERBY) { + orderbyNode = node; + } + } + + if (selectNode != null) { + int selectExpCnt = selectNode.getChildCount(); + + // replace each of the position alias in GROUPBY with the actual column name + if (groupbyNode != null) { + for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) { + ASTNode node = (ASTNode) groupbyNode.getChild(child_pos); + if (node.getToken().getType() == HiveParser.Number) { + int pos = Integer.parseInt(node.getText()); + if (pos > 0 && pos <= selectExpCnt) { + groupbyNode.setChild(child_pos, + (BaseTree) selectNode.getChild(pos - 1).getChild(0)); + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg( + "Position alias: " + pos + " does not exist\n" + + "The Select List is indexed from 1 to " + selectExpCnt)); + } + } + } + } + + // replace each of the position alias in ORDERBY with the actual column name + if (orderbyNode != null) { + isAllCol = false; + for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) { + ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + isAllCol = true; + } + } + for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) { + ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos); + ASTNode node = (ASTNode) colNode.getChild(0); + if (node.getToken().getType() == HiveParser.Number) { + if (!isAllCol) { + int pos = Integer.parseInt(node.getText()); + if (pos > 0 && pos <= selectExpCnt) { + colNode.setChild(0, (BaseTree) selectNode.getChild(pos - 1).getChild(0)); + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( + "Position alias: " + pos + " does not exist\n" + + "The Select List is indexed from 1 to " + selectExpCnt)); + } + } else { + throw new SemanticException( + ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); + } + } + } + } + } + + // Recursively process through the children ASTNodes + for (int child_pos = 0; child_pos < child_count; ++child_pos) { + processPositionAlias((ASTNode) ast.getChild(child_pos)); + } + return; + } + + public QB getQB() { return qb; } diff --git ql/src/test/queries/clientnegative/groupby_invalid_position.q ql/src/test/queries/clientnegative/groupby_invalid_position.q new file mode 100644 index 0000000..173a752 --- /dev/null +++ ql/src/test/queries/clientnegative/groupby_invalid_position.q @@ -0,0 +1,4 @@ +set hive.groupby.orderby.position.alias=true; + +-- invalid position alias in group by +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY 3; diff --git ql/src/test/queries/clientnegative/orderby_invalid_position.q ql/src/test/queries/clientnegative/orderby_invalid_position.q new file mode 100644 index 0000000..4dbf2a6 --- /dev/null +++ ql/src/test/queries/clientnegative/orderby_invalid_position.q @@ -0,0 +1,4 @@ +set hive.groupby.orderby.position.alias=true; + +-- invalid position alias in order by +SELECT src.key, src.value FROM src ORDER BY 0; diff --git ql/src/test/queries/clientnegative/orderby_position_unsupported.q ql/src/test/queries/clientnegative/orderby_position_unsupported.q new file mode 100644 index 0000000..a490c23 --- /dev/null +++ ql/src/test/queries/clientnegative/orderby_position_unsupported.q @@ -0,0 +1,4 @@ +set hive.groupby.orderby.position.alias=true; + +-- position alias is not supported when SELECT * +SELECT src.* FROM src ORDER BY 1; diff --git ql/src/test/queries/clientpositive/groupby_position.q ql/src/test/queries/clientpositive/groupby_position.q new file mode 100644 index 0000000..b4bb82a --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_position.q @@ -0,0 +1,53 @@ +set hive.groupby.orderby.position.alias=true; + +CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2; + +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2; + +SELECT key, value FROM testTable1 ORDER BY 1, 2; +SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1; + +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1; + +SELECT key, value FROM testTable1 ORDER BY 1, 2; +SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3; + +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +CREATE TABLE testPartTable1(key STRING, val1 INT, val2 INT) partitioned by (ds string); +CREATE TABLE testPartTable2(key STRING, val1 INT, val2 INT) partitioned by (ds string); + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE testPartTable1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testPartTable2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1; + +FROM src +INSERT OVERWRITE TABLE testPartTable1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testPartTable2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1; + +SELECT key, val1, val2 from testPartTable1 ORDER BY 2 DESC, 1 ASC, 3 DESC; +SELECT key, val1, val2 from testPartTable2 ORDER BY 3 DESC, 2 DESC, 1 DESC; + + + diff --git ql/src/test/results/clientnegative/groupby_invalid_position.q.out ql/src/test/results/clientnegative/groupby_invalid_position.q.out new file mode 100644 index 0000000..1652238 --- /dev/null +++ ql/src/test/results/clientnegative/groupby_invalid_position.q.out @@ -0,0 +1,3 @@ +FAILED: SemanticException [Error 10220]: Invalid position alias in Group By + Position alias: 3 does not exist +The Select List is indexed from 1 to 2 diff --git ql/src/test/results/clientnegative/orderby_invalid_position.q.out ql/src/test/results/clientnegative/orderby_invalid_position.q.out new file mode 100644 index 0000000..95c8fa7 --- /dev/null +++ ql/src/test/results/clientnegative/orderby_invalid_position.q.out @@ -0,0 +1,3 @@ +FAILED: SemanticException [Error 10221]: Invalid position alias in Order By + Position alias: 0 does not exist +The Select List is indexed from 1 to 2 diff --git ql/src/test/results/clientnegative/orderby_position_unsupported.q.out ql/src/test/results/clientnegative/orderby_position_unsupported.q.out new file mode 100644 index 0000000..56d3240 --- /dev/null +++ ql/src/test/results/clientnegative/orderby_position_unsupported.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10219]: Position in ORDER BY is not supported when using SELECT * diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out new file mode 100644 index 0000000..ced89fa --- /dev/null +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -0,0 +1,1021 @@ +PREHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@testTable1 +PREHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@testTable2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 20.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Filter Operator + predicate: + expr: (key < 20.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testtable1 +PREHOOK: Output: default@testtable2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testtable1 +POSTHOOK: Output: default@testtable2 +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable1 +#### A masked pattern was here #### +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 1 +2 1 +4 1 +5 1 +8 1 +9 1 +10 1 +11 1 +12 1 +15 1 +17 1 +18 1 +19 1 +PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable2 +#### A masked pattern was here #### +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 1 +2 val_2 1 +4 val_4 1 +5 val_5 1 +8 val_8 1 +9 val_9 1 +10 val_10 1 +11 val_11 1 +12 val_12 1 +15 val_15 1 +17 val_17 1 +18 val_18 1 +19 val_19 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testTable2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 20.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Filter Operator + predicate: + expr: (key < 20.0) + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: value + type: string + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col0 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testtable1 +PREHOOK: Output: default@testtable2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testtable1 +POSTHOOK: Output: default@testtable2 +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable1 +#### A masked pattern was here #### +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 1 +2 1 +4 1 +5 1 +8 1 +9 1 +10 1 +11 1 +12 1 +15 1 +17 1 +18 1 +19 1 +PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable2 +#### A masked pattern was here #### +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 1 +2 val_2 1 +4 val_4 1 +5 val_5 1 +8 val_8 1 +9 val_9 1 +10 val_10 1 +11 val_11 1 +12 val_12 1 +15 val_15 1 +17 val_17 1 +18 val_18 1 +19 val_19 1 +PREHOOK: query: CREATE TABLE testPartTable1(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE testPartTable1(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@testPartTable1 +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE testPartTable2(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE testPartTable2(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@testPartTable2 +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE testPartTable1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testPartTable2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE testPartTable1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testPartTable2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testPartTable1) (TOK_PARTSPEC (TOK_PARTVAL ds '111')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME testPartTable2) (TOK_PARTSPEC (TOK_PARTVAL ds '111')))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL SRC) key) 20)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 20.0) + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Reduce Output Operator + key expressions: + expr: value + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: value + type: string + expr: key + type: string + tag: -1 + Filter Operator + predicate: + expr: (key < 20.0) + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(KEY._col1:0._col0) + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testparttable1 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testparttable1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: substr(value, 5) + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(value, 5) + type: string + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(KEY._col1:0._col0) + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testparttable2 + + Stage: Stage-1 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testparttable2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE testPartTable1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testPartTable2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testparttable1@ds=111 +PREHOOK: Output: default@testparttable2@ds=111 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE testPartTable1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testPartTable2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) WHERE SRC.key < 20 GROUP BY 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testparttable1@ds=111 +POSTHOOK: Output: default@testparttable2@ds=111 +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key, val1, val2 from testPartTable1 ORDER BY 2 DESC, 1 ASC, 3 DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@testparttable1@ds=111 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val1, val2 from testPartTable1 ORDER BY 2 DESC, 1 ASC, 3 DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testparttable1@ds=111 +#### A masked pattern was here #### +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 3 1 +val_5 3 1 +val_12 2 1 +val_15 2 1 +val_18 2 1 +val_10 1 1 +val_11 1 1 +val_17 1 1 +val_19 1 1 +val_2 1 1 +val_4 1 1 +val_8 1 1 +val_9 1 1 +PREHOOK: query: SELECT key, val1, val2 from testPartTable2 ORDER BY 3 DESC, 2 DESC, 1 DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@testparttable2@ds=111 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val1, val2 from testPartTable2 ORDER BY 3 DESC, 2 DESC, 1 DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testparttable2@ds=111 +#### A masked pattern was here #### +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testparttable2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +5 3 1 +0 3 1 +18 2 1 +15 2 1 +12 2 1 +9 1 1 +8 1 1 +4 1 1 +2 1 1 +19 1 1 +17 1 1 +11 1 1 +10 1 1