Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java (revision 1151047) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java (working copy) @@ -20,11 +20,15 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.TaskExecutionException; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -34,10 +38,11 @@ * */ @Description(name = "explode", - value = "_FUNC_(a) - separates the elements of array a into multiple rows ") + value = "_FUNC_(a) - separates the elements of array a into multiple rows," + + " or the elements of a map into multiple rows and columns ") public class GenericUDTFExplode extends GenericUDTF { - private ListObjectInspector listOI = null; + private ObjectInspector inputOI = null; @Override public void close() throws HiveException { @@ -49,31 +54,62 @@ throw new UDFArgumentException("explode() takes only one argument"); } - if (args[0].getCategory() != ObjectInspector.Category.LIST) { - throw new UDFArgumentException("explode() takes an array as a parameter"); + ArrayList fieldNames = new ArrayList(); + ArrayList fieldOIs = new ArrayList(); + + switch (args[0].getCategory()) { + case LIST: + inputOI = args[0]; + fieldNames.add("col"); + fieldOIs.add(((ListObjectInspector)inputOI).getListElementObjectInspector()); + break; + case MAP: + inputOI = args[0]; + fieldNames.add("key"); + fieldNames.add("value"); + fieldOIs.add(((MapObjectInspector)inputOI).getMapKeyObjectInspector()); + fieldOIs.add(((MapObjectInspector)inputOI).getMapValueObjectInspector()); + break; + default: + throw new UDFArgumentException("explode() takes an array or a map as a parameter"); } - listOI = (ListObjectInspector) args[0]; - ArrayList fieldNames = new ArrayList(); - ArrayList fieldOIs = new ArrayList(); - fieldNames.add("col"); - fieldOIs.add(listOI.getListElementObjectInspector()); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } - private final Object[] forwardObj = new Object[1]; + private final Object[] forwardListObj = new Object[1]; + private final Object[] forwardMapObj = new Object[2]; @Override public void process(Object[] o) throws HiveException { - List list = listOI.getList(o[0]); - if(list == null) { - return; + switch (inputOI.getCategory()) { + case LIST: + ListObjectInspector listOI = (ListObjectInspector)inputOI; + List list = listOI.getList(o[0]); + if (list == null) { + return; + } + for (Object r : list) { + forwardListObj[0] = r; + forward(forwardListObj); + } + break; + case MAP: + MapObjectInspector mapOI = (MapObjectInspector)inputOI; + Map map = mapOI.getMap(o[0]); + if (map == null) { + return; + } + for (Entry r : map.entrySet()) { + forwardMapObj[0] = r.getKey(); + forwardMapObj[1] = r.getValue(); + forward(forwardMapObj); + } + break; + default: + throw new TaskExecutionException("explode() can only operate on an array or a map"); } - for (Object r : list) { - forwardObj[0] = r; - forward(forwardObj); - } } @Override Index: ql/src/test/queries/clientnegative/udtf_explode_not_supported1.q =================================================================== --- ql/src/test/queries/clientnegative/udtf_explode_not_supported1.q (revision 0) +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported1.q (revision 0) @@ -0,0 +1 @@ +SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src GROUP BY key; Index: ql/src/test/queries/clientnegative/udtf_explode_not_supported2.q =================================================================== --- ql/src/test/queries/clientnegative/udtf_explode_not_supported2.q (revision 0) +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported2.q (revision 0) @@ -0,0 +1 @@ +SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal,myVal2) FROM src; \ No newline at end of file Index: ql/src/test/queries/clientnegative/udtf_explode_not_supported3.q =================================================================== --- ql/src/test/queries/clientnegative/udtf_explode_not_supported3.q (revision 0) +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported3.q (revision 0) @@ -0,0 +1 @@ +select explode(array(1),array(2)) as myCol from src; \ No newline at end of file Index: ql/src/test/queries/clientnegative/udtf_explode_not_supported4.q =================================================================== --- ql/src/test/queries/clientnegative/udtf_explode_not_supported4.q (revision 0) +++ ql/src/test/queries/clientnegative/udtf_explode_not_supported4.q (revision 0) @@ -0,0 +1 @@ +SELECT explode(null) as myNull FROM src GROUP BY key; \ No newline at end of file Index: ql/src/test/queries/clientpositive/explode_null.q =================================================================== --- ql/src/test/queries/clientpositive/explode_null.q (revision 1151047) +++ ql/src/test/queries/clientpositive/explode_null.q (working copy) @@ -2,4 +2,9 @@ (SELECT array(1,2,3) AS col FROM src LIMIT 1 UNION ALL SELECT IF(false, array(1,2,3), NULL) AS col FROM src LIMIT 1) a; + +SELECT explode(col) AS (myCol1,myCol2) FROM + (SELECT map(1,'one',2,'two',3,'three') AS col FROM src LIMIT 1 + UNION ALL + SELECT IF(false, map(1,'one',2,'two',3,'three'), NULL) AS col FROM src LIMIT 1) a; \ No newline at end of file Index: ql/src/test/queries/clientpositive/udf_explode.q =================================================================== --- ql/src/test/queries/clientpositive/udf_explode.q (revision 1151047) +++ ql/src/test/queries/clientpositive/udf_explode.q (working copy) @@ -7,3 +7,9 @@ SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3; SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3; SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol; + +EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3; +EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val; + +SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3; +SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val; \ No newline at end of file Index: ql/src/test/queries/clientpositive/udtf_explode.q =================================================================== --- ql/src/test/queries/clientpositive/udtf_explode.q (revision 1151047) +++ ql/src/test/queries/clientpositive/udtf_explode.q (working copy) @@ -7,3 +7,9 @@ SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3; SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3; SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol; + +EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3; +EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal; + +SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3; +SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal; \ No newline at end of file Index: ql/src/test/results/clientnegative/udtf_explode_not_supported1.q.out =================================================================== --- ql/src/test/results/clientnegative/udtf_explode_not_supported1.q.out (revision 0) +++ ql/src/test/results/clientnegative/udtf_explode_not_supported1.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: GROUP BY is not supported with a UDTF in the SELECT clause Index: ql/src/test/results/clientnegative/udtf_explode_not_supported2.q.out =================================================================== --- ql/src/test/results/clientnegative/udtf_explode_not_supported2.q.out (revision 0) +++ ql/src/test/results/clientnegative/udtf_explode_not_supported2.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF expected 2 aliases but got 3 Index: ql/src/test/results/clientnegative/udtf_explode_not_supported3.q.out =================================================================== --- ql/src/test/results/clientnegative/udtf_explode_not_supported3.q.out (revision 0) +++ ql/src/test/results/clientnegative/udtf_explode_not_supported3.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: explode() takes only one argument Index: ql/src/test/results/clientnegative/udtf_explode_not_supported4.q.out =================================================================== --- ql/src/test/results/clientnegative/udtf_explode_not_supported4.q.out (revision 0) +++ ql/src/test/results/clientnegative/udtf_explode_not_supported4.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: GROUP BY is not supported with a UDTF in the SELECT clause Index: ql/src/test/results/clientpositive/explode_null.q.out =================================================================== --- ql/src/test/results/clientpositive/explode_null.q.out (revision 1151047) +++ ql/src/test/results/clientpositive/explode_null.q.out (working copy) @@ -4,14 +4,31 @@ SELECT IF(false, array(1,2,3), NULL) AS col FROM src LIMIT 1) a PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/pyang/task2/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_15-57-57_440_1997210643218047348/10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-31-35_640_8127575371135726411/-mr-10000 POSTHOOK: query: SELECT explode(col) AS myCol FROM (SELECT array(1,2,3) AS col FROM src LIMIT 1 UNION ALL SELECT IF(false, array(1,2,3), NULL) AS col FROM src LIMIT 1) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/pyang/task2/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_15-57-57_440_1997210643218047348/10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-31-35_640_8127575371135726411/-mr-10000 1 2 3 +PREHOOK: query: SELECT explode(col) AS (myCol1,myCol2) FROM + (SELECT map(1,'one',2,'two',3,'three') AS col FROM src LIMIT 1 + UNION ALL + SELECT IF(false, map(1,'one',2,'two',3,'three'), NULL) AS col FROM src LIMIT 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-31-59_963_8019504615296284121/-mr-10000 +POSTHOOK: query: SELECT explode(col) AS (myCol1,myCol2) FROM + (SELECT map(1,'one',2,'two',3,'three') AS col FROM src LIMIT 1 + UNION ALL + SELECT IF(false, map(1,'one',2,'two',3,'three'), NULL) AS col FROM src LIMIT 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-31-59_963_8019504615296284121/-mr-10000 +1 one +2 two +3 three Index: ql/src/test/results/clientpositive/udf_explode.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_explode.q.out (revision 1151047) +++ ql/src/test/results/clientpositive/udf_explode.q.out (working copy) @@ -2,12 +2,12 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: DESCRIBE FUNCTION EXTENDED explode PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 @@ -38,9 +38,9 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_17-33-22_671_2259921620431089131/-ext-10001 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_027_5358834315394341941/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/sdong/hive_2011-02-10_17-33-22_671_2259921620431089131/-ext-10001/ + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_027_5358834315394341941/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -53,9 +53,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src [src] + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [src] Path -> Partition: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -66,12 +66,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312275423 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -82,12 +82,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312275423 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src @@ -133,9 +133,9 @@ type: int Needs Tagging: false Path -> Alias: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src [a:src] + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [a:src] Path -> Partition: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -146,12 +146,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312275423 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -162,12 +162,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312275423 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src @@ -191,7 +191,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-mr-10002 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -207,7 +207,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-mr-10002 + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -222,9 +222,9 @@ type: bigint Needs Tagging: false Path -> Alias: - file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-mr-10002 [file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-mr-10002] + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-mr-10002 [file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-mr-10002] Path -> Partition: - file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-mr-10002 + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -260,9 +260,9 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-ext-10001 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/sdong/hive_2011-02-10_17-33-22_756_4839445772006527677/-ext-10001/ + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_388_4534643701843903167/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -282,33 +282,340 @@ PREHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-33-22_847_3998623890609022860/-mr-10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_648_8727187961998229618/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-33-22_847_3998623890609022860/-mr-10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-08_648_8727187961998229618/-mr-10000 1 2 3 PREHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-33-26_152_5865185785205150430/-mr-10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-16_417_339637955227959213/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-33-26_152_5865185785205150430/-mr-10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-16_417_339637955227959213/-mr-10000 1 2 3 PREHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-33-29_424_2456038912240344372/-mr-10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-23_795_2244739520484566951/-mr-10000 POSTHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-33-29_424_2456038912240344372/-mr-10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-23_795_2244739520484566951/-mr-10000 1 1 2 1 3 1 +PREHOOK: query: EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) key val)) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_559_1879582928207704725/-ext-10001 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_559_1879582928207704725/-ext-10001/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns key,value + columns.types int:string + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [src] + Path -> Partition: + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1312275423 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1312275423 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) key val)) (TOK_LIMIT 3))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) val)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: key + type: int + expr: value + type: string + Needs Tagging: false + Path -> Alias: + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [a:src] + Path -> Partition: + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1312275423 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1312275423 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Extract + Limit + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: int + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-mr-10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-mr-10002 [file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-mr-10002] + Path -> Partition: + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-mr-10002 + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-ext-10001 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_665_6727422705224505334/-ext-10001/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:bigint + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_827_5146430802966063551/-mr-10000 +POSTHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-43_827_5146430802966063551/-mr-10000 +1 one +2 two +3 three +PREHOOK: query: SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-52_249_3025251296056353497/-mr-10000 +POSTHOOK: query: SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src LIMIT 3) a GROUP BY a.key, a.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_01-57-52_249_3025251296056353497/-mr-10000 +1 one 1 +2 two 1 +3 three 1 Index: ql/src/test/results/clientpositive/udtf_explode.q.out =================================================================== --- ql/src/test/results/clientpositive/udtf_explode.q.out (revision 1151047) +++ ql/src/test/results/clientpositive/udtf_explode.q.out (working copy) @@ -2,12 +2,12 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: DESCRIBE FUNCTION EXTENDED explode PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED explode POSTHOOK: type: DESCFUNCTION -explode(a) - separates the elements of array a into multiple rows +explode(a) - separates the elements of array a into multiple rows, or the elements of a map into multiple rows and columns PREHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 @@ -38,9 +38,9 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_17-46-44_813_2933283385410388072/-ext-10001 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-29_444_7280637873467578064/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/sdong/hive_2011-02-10_17-46-44_813_2933283385410388072/-ext-10001/ + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-29_444_7280637873467578064/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -53,9 +53,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src [src] + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [src] Path -> Partition: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -66,12 +66,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312276044 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -82,12 +82,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312276044 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src @@ -133,9 +133,9 @@ type: int Needs Tagging: false Path -> Alias: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src [a:src] + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [a:src] Path -> Partition: - pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -146,12 +146,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312276044 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -162,12 +162,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/sdong/www/open-source-hive1/build/ql/test/data/warehouse/src + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src name default.src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1297378968 + transient_lastDdlTime 1312276044 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src @@ -191,7 +191,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-mr-10002 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -207,7 +207,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-mr-10002 + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -222,9 +222,9 @@ type: bigint Needs Tagging: false Path -> Alias: - file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-mr-10002 [file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-mr-10002] + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-mr-10002 [file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-mr-10002] Path -> Partition: - file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-mr-10002 + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -260,9 +260,9 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-ext-10001 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-ext-10001 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: file:/tmp/sdong/hive_2011-02-10_17-46-44_900_3479860791903660768/-ext-10001/ + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_030_334769724985722712/-ext-10001/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -282,33 +282,289 @@ PREHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-46-44_991_4845617813725464653/-mr-10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_220_7107141868848108363/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-46-44_991_4845617813725464653/-mr-10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-30_220_7107141868848108363/-mr-10000 1 2 3 PREHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-46-48_334_8378511418411346233/-mr-10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-36_813_1130079024514346887/-mr-10000 POSTHOOK: query: SELECT explode(array(1,2,3)) AS (myCol) FROM src LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-46-48_334_8378511418411346233/-mr-10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-36_813_1130079024514346887/-mr-10000 1 2 3 PREHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-46-51_612_7416766295859987410/-mr-10000 +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-43_113_588380745988635092/-mr-10000 POSTHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-46-51_612_7416766295859987410/-mr-10000 +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-07-43_113_588380745988635092/-mr-10000 1 1 2 1 3 1 +PREHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) myKey myVal)) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION map 1 'one' 2 'two' 3 'three')) myKey myVal)) (TOK_LIMIT 3))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myKey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) myVal)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) myKey) (. (TOK_TABLE_OR_COL a) myVal)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: map(1:'one',2:'two',3:'three') + type: map + outputColumnNames: _col0 + UDTF Operator + function name: explode + Limit + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: key + type: int + expr: value + type: string + Needs Tagging: false + Path -> Alias: + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src [a:src] + Path -> Partition: + pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1312276044 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/amarsri/Documents/workspace/hive/build/ql/test/data/warehouse/src + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1312276044 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Extract + Limit + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: int + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-mr-10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-mr-10002 [file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-mr-10002] + Path -> Partition: + file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-mr-10002 + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-ext-10001 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_050_527968501217218085/-ext-10001/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:bigint + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_212_7537716829689807307/-mr-10000 +POSTHOOK: query: SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-01_212_7537716829689807307/-mr-10000 +1 one +2 two +3 three +PREHOOK: query: SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-09_451_3829925719108420705/-mr-10000 +POSTHOOK: query: SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-08-02_02-08-09_451_3829925719108420705/-mr-10000 +1 one 1 +2 two 1 +3 three 1