diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java @@ -20,11 +20,15 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.TaskExecutionException; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -34,10 +38,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; * */ @Description(name = "explode", - value = "_FUNC_(a) - separates the elements of array a into multiple rows ") + value = "_FUNC_(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns ") public class GenericUDTFExplode extends GenericUDTF { - private ListObjectInspector listOI = null; + private ObjectInspector inputOI = null; @Override public void close() throws HiveException { @@ -49,30 +53,57 @@ public class GenericUDTFExplode extends GenericUDTF { throw new UDFArgumentException("explode() takes only one argument"); } - if (args[0].getCategory() != ObjectInspector.Category.LIST) { - throw new UDFArgumentException("explode() takes an array as a parameter"); - } - listOI = (ListObjectInspector) args[0]; - ArrayList fieldNames = new ArrayList(); ArrayList fieldOIs = new ArrayList(); - fieldNames.add("col"); - fieldOIs.add(listOI.getListElementObjectInspector()); + + switch (args[0].getCategory()) { + case LIST: + inputOI = args[0]; + fieldNames.add("col"); + fieldOIs.add(((ListObjectInspector)inputOI).getListElementObjectInspector()); + break; + case MAP: + inputOI = args[0]; + fieldNames.add("key"); + fieldNames.add("value"); + fieldOIs.add(((MapObjectInspector)inputOI).getMapKeyObjectInspector()); + fieldOIs.add(((MapObjectInspector)inputOI).getMapValueObjectInspector()); + break; + default: + throw new UDFArgumentException("explode() takes an array or a map as a parameter"); + } + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } - private final Object[] forwardObj = new Object[1]; + private final Object[] forwardListObj = new Object[1]; + private final Object[] forwardMapObj = new Object[2]; @Override public void process(Object[] o) throws HiveException { - List list = listOI.getList(o[0]); - if(list == null) { - return; - } - for (Object r : list) { - forwardObj[0] = r; - forward(forwardObj); + switch (inputOI.getCategory()) { + case LIST: + ListObjectInspector listOI = (ListObjectInspector)inputOI; + List list = listOI.getList(o[0]); + assert(list != null); + for (Object r : list) { + forwardListObj[0] = r; + forward(forwardListObj); + } + break; + case MAP: + MapObjectInspector mapOI = (MapObjectInspector)inputOI; + Map map = mapOI.getMap(o[0]); + assert(map != null); + for (Entry r : map.entrySet()) { + forwardMapObj[0] = r.getKey(); + forwardMapObj[1] = r.getValue(); + forward(forwardMapObj); + } + break; + default: + throw new TaskExecutionException("explode() can only operate on an array or a map"); } } diff --git ql/src/test/queries/clientnegative/udtf_explode_not_supported1.q ql/src/test/queries/clientnegative/udtf_explode_not_supported1.q --- /dev/null +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported1.q @@ -0,0 +1 @@ +SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src GROUP BY key; diff --git ql/src/test/queries/clientnegative/udtf_explode_not_supported2.q ql/src/test/queries/clientnegative/udtf_explode_not_supported2.q --- /dev/null +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported2.q @@ -0,0 +1 @@ +SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal,myVal2) FROM src; \ No newline at end of file diff --git ql/src/test/queries/clientnegative/udtf_explode_not_supported3.q ql/src/test/queries/clientnegative/udtf_explode_not_supported3.q --- /dev/null +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported3.q @@ -0,0 +1 @@ +select explode(array(1),array(2)) as myCol from src; \ No newline at end of file diff --git ql/src/test/queries/clientnegative/udtf_explode_not_supported4.q ql/src/test/queries/clientnegative/udtf_explode_not_supported4.q --- /dev/null +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported4.q @@ -0,0 +1 @@ +SELECT explode(null) as myNull FROM src GROUP BY key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/udf_explode.q ql/src/test/queries/clientpositive/udf_explode.q --- ql/src/test/queries/clientpositive/udf_explode.q +++ ql/src/test/queries/clientpositive/udf_explode.q @@ -7,3 +7,9 @@ EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3; SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3; SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol; + +EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3; +EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val; + +SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3; +SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/udtf_explode.q ql/src/test/queries/clientpositive/udtf_explode.q --- ql/src/test/queries/clientpositive/udtf_explode.q +++ ql/src/test/queries/clientpositive/udtf_explode.q @@ -7,3 +7,9 @@ EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3; SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3; SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol; + +EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3; +EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal; + +SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3; +SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal; \ No newline at end of file diff --git ql/src/test/results/clientnegative/udtf_explode_not_supported1.q.out ql/src/test/results/clientnegative/udtf_explode_not_supported1.q.out --- /dev/null +++ ql/src/test/results/clientnegative/udtf_explode_not_supported1.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: GROUP BY is not supported with a UDTF in the SELECT clause diff --git ql/src/test/results/clientnegative/udtf_explode_not_supported2.q.out ql/src/test/results/clientnegative/udtf_explode_not_supported2.q.out --- /dev/null +++ ql/src/test/results/clientnegative/udtf_explode_not_supported2.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF expected 2 aliases but got 3 diff --git ql/src/test/results/clientnegative/udtf_explode_not_supported3.q.out ql/src/test/results/clientnegative/udtf_explode_not_supported3.q.out --- /dev/null +++ ql/src/test/results/clientnegative/udtf_explode_not_supported3.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: explode() takes only one argument diff --git ql/src/test/results/clientnegative/udtf_explode_not_supported4.q.out ql/src/test/results/clientnegative/udtf_explode_not_supported4.q.out --- /dev/null +++ ql/src/test/results/clientnegative/udtf_explode_not_supported4.q.out @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: GROUP BY is not supported with a UDTF in the SELECT clause diff --git ql/src/test/results/clientpositive/udf_explode.q.out ql/src/test/results/clientpositive/udf_explode.q.out --- ql/src/test/results/clientpositive/udf_explode.q.out +++ ql/src/test/results/clientpositive/udf_explode.q.out @@ -2,12 +2,12 @@ PREHOOK: query: DESCRIBE FUNCTION explode PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: DESCRIBE FUNCTION EXTENDED explode PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 @@ -38,9 +38,9 @@ STAGE PLANS: File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-15_17-19-40_025_3910076199083461129/-ext-10001 + directory: file:/tmp/natty/hive_2011-01-23_22-40-34_058_7723086660741484859/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/nzhang/hive_2010-09-15_17-19-40_025_3910076199083461129/-ext-10001/ + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-23_22-40-34_058_7723086660741484859/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -53,9 +53,9 @@ STAGE PLANS: MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [src] + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [src] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -66,12 +66,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295851232 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -82,12 +82,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295851232 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -133,9 +133,9 @@ STAGE PLANS: type: int Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [a:src] + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [a:src] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -146,12 +146,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295851232 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -162,12 +162,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295851232 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -191,7 +191,7 @@ STAGE PLANS: File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-mr-10002 + directory: file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -207,7 +207,7 @@ STAGE PLANS: Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-mr-10002 + file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -222,9 +222,9 @@ STAGE PLANS: type: bigint Needs Tagging: false Path -> Alias: - file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-mr-10002 [file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-mr-10002] + file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-mr-10002 [file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-mr-10002] Path -> Partition: - file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-mr-10002 + file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -260,9 +260,9 @@ STAGE PLANS: File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-ext-10001 + directory: file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/nzhang/hive_2010-09-15_17-19-40_065_141080764987446855/-ext-10001/ + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-23_22-40-34_232_6578856327974225797/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -282,33 +282,340 @@ STAGE PLANS: PREHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-19-40_117_2393476748831762851/-mr-10000 +PREHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-34_327_8296904487280447331/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-19-40_117_2393476748831762851/-mr-10000 +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-34_327_8296904487280447331/-mr-10000 1 2 3 PREHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-20-00_614_4063766451500768976/-mr-10000 +PREHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-37_723_5094728457793614732/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-20-00_614_4063766451500768976/-mr-10000 +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-37_723_5094728457793614732/-mr-10000 1 2 3 PREHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-20-08_183_2692769490322479197/-mr-10000 +PREHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-40_683_4143751800776011244/-mr-10000 POSTHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-20-08_183_2692769490322479197/-mr-10000 +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-40_683_4143751800776011244/-mr-10000 1 1 2 1 3 1 +PREHOOK: query: EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) key val)) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/tmp/natty/hive_2011-01-23_22-40-46_712_384648983380705168/-ext-10001 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-23_22-40-46_712_384648983380705168/-ext-10001/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types int:string + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [src] + Path -> Partition: + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1295851232 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1295851232 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + name: src + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) key val)) (TOK_LIMIT 3))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) val)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: key + type: int + expr: value + type: string + Needs Tagging: false + Path -> Alias: + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [a:src] + Path -> Partition: + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1295851232 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1295851232 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + name: src + Reduce Operator Tree: + Extract + Limit + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: int + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-mr-10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-mr-10002 [file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-mr-10002] + Path -> Partition: + file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-mr-10002 + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-ext-10001 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-23_22-40-46_772_8882433807997451677/-ext-10001/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:bigint + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-46_869_2795585690599571198/-mr-10000 +POSTHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-46_869_2795585690599571198/-mr-10000 +1 one +2 two +3 three +PREHOOK: query: SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-50_063_1577268674562251221/-mr-10000 +POSTHOOK: query: SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-23_22-40-50_063_1577268674562251221/-mr-10000 +1 one 1 +2 two 1 +3 three 1 diff --git ql/src/test/results/clientpositive/udtf_explode.q.out ql/src/test/results/clientpositive/udtf_explode.q.out --- ql/src/test/results/clientpositive/udtf_explode.q.out +++ ql/src/test/results/clientpositive/udtf_explode.q.out @@ -2,12 +2,12 @@ PREHOOK: query: DESCRIBE FUNCTION explode PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: DESCRIBE FUNCTION EXTENDED explode PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 @@ -38,9 +38,9 @@ STAGE PLANS: File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-15_17-34-12_472_461078458690479492/-ext-10001 + directory: file:/tmp/natty/hive_2011-01-24_23-03-26_947_2394833512942025207/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/nzhang/hive_2010-09-15_17-34-12_472_461078458690479492/-ext-10001/ + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-24_23-03-26_947_2394833512942025207/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -53,9 +53,9 @@ STAGE PLANS: MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [src] + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [src] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -66,12 +66,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295939004 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -82,12 +82,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295939004 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -133,9 +133,9 @@ STAGE PLANS: type: int Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [a:src] + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [a:src] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -146,12 +146,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295939004 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -162,12 +162,12 @@ STAGE PLANS: columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284588338 + transient_lastDdlTime 1295939004 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -191,7 +191,7 @@ STAGE PLANS: File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-mr-10002 + directory: file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -207,7 +207,7 @@ STAGE PLANS: Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-mr-10002 + file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -222,9 +222,9 @@ STAGE PLANS: type: bigint Needs Tagging: false Path -> Alias: - file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-mr-10002 [file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-mr-10002] + file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-mr-10002 [file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-mr-10002] Path -> Partition: - file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-mr-10002 + file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -260,9 +260,9 @@ STAGE PLANS: File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-ext-10001 + directory: file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/nzhang/hive_2010-09-15_17-34-12_511_3699113220853694956/-ext-10001/ + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-24_23-03-27_165_6752148407305222329/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -282,33 +282,289 @@ STAGE PLANS: PREHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-34-12_563_1523905309826369917/-mr-10000 +PREHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-27_265_8817855433973576509/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-34-12_563_1523905309826369917/-mr-10000 +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-27_265_8817855433973576509/-mr-10000 1 2 3 PREHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-34-16_104_3527117414764259158/-mr-10000 +PREHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-31_158_6187739418666993884/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-34-16_104_3527117414764259158/-mr-10000 +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-31_158_6187739418666993884/-mr-10000 1 2 3 PREHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-34-19_634_506308003652349415/-mr-10000 +PREHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-34_240_4984041270492279200/-mr-10000 POSTHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-15_17-34-19_634_506308003652349415/-mr-10000 +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-34_240_4984041270492279200/-mr-10000 1 1 2 1 3 1 +PREHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) myKey myVal)) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) myKey myVal)) (TOK_LIMIT 3))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myKey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myVal)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) myKey) (. (TOK_TABLE_OR_COL a) myVal)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: key + type: int + expr: value + type: string + Needs Tagging: false + Path -> Alias: + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src [a:src] + Path -> Partition: + pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1295939004 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/home/natty/apache/hive/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1295939004 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + name: src + Reduce Operator Tree: + Extract + Limit + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: int + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-mr-10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-mr-10002 [file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-mr-10002] + Path -> Partition: + file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-mr-10002 + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-ext-10001 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: file:/tmp/natty/hive_2011-01-24_23-03-40_490_471322677945824981/-ext-10001/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:bigint + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-40_598_7919385398480016176/-mr-10000 +POSTHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-40_598_7919385398480016176/-mr-10000 +1 one +2 two +3 three +PREHOOK: query: SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-43_559_8982597379255990012/-mr-10000 +POSTHOOK: query: SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/natty/hive_2011-01-24_23-03-43_559_8982597379255990012/-mr-10000 +1 one 1 +2 two 1 +3 three 1