diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index a5489a9..d8ac6ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -410,6 +410,10 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException * Currently only used by FetchTask. **/ public boolean pushRow() throws IOException, HiveException { + if (operator == null) { + return false; + } + if (work.getRowsComputedUsingStats() != null) { for (List row : work.getRowsComputedUsingStats()) { operator.process(row, 0); @@ -524,10 +528,7 @@ public void clearFetchContext() throws HiveException { currRecReader.close(); currRecReader = null; } - if (operator != null) { - operator.close(false); - operator = null; - } + closeOperator(); if (context != null) { context.clear(); context = null; @@ -542,6 +543,13 @@ public void clearFetchContext() throws HiveException { } } + public void closeOperator() throws HiveException { + if (operator != null) { + operator.close(false); + operator = null; + } + } + /** * used for bucket map join */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index ec9e98e..dff1815 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -145,6 +145,10 @@ public boolean fetch(List res) throws IOException, CommandNeedRetryException { if (work.getLeastNumRows() > 0) { throw new CommandNeedRetryException(); } + + // Closing the operator can sometimes yield more rows (HIVE-11892) + fetch.closeOperator(); + return fetched; } fetched = true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index b5ceb14..eb0ba7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -289,7 +289,7 @@ private boolean isConvertible(FetchData fetch) { private boolean isConvertible(FetchData fetch, Operator operator, Set> traversed) { if (operator instanceof ReduceSinkOperator || operator instanceof CommonJoinOperator - || operator instanceof ScriptOperator || operator instanceof UDTFOperator) { + || operator instanceof ScriptOperator) { return false; } diff --git a/ql/src/test/results/clientpositive/lateral_view_noalias.q.out b/ql/src/test/results/clientpositive/lateral_view_noalias.q.out index 3604049..3532737 100644 --- a/ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ b/ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -5,20 +5,38 @@ POSTHOOK: query: --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 172000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 172000 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col5, _col6 + Statistics: Num rows: 1000 Data size: 364000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 364000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 Statistics: Num rows: 1000 Data size: 364000 Basic stats: COMPLETE Column stats: COMPLETE @@ -29,43 +47,7 @@ STAGE PLANS: Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Lateral View Join Operator - outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 364000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 364000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 2 - Processor Tree: - ListSink + ListSink PREHOOK: query: SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 PREHOOK: type: QUERY @@ -82,39 +64,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index 73bbdf5..8a3c2a7 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -841,22 +841,40 @@ explain select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + Select Operator + expressions: array(key,value) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col7 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -867,43 +885,7 @@ STAGE PLANS: Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: array(key,value) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col7 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 20 - Processor Tree: - ListSink + ListSink PREHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/select_dummy_source.q.out b/ql/src/test/results/clientpositive/select_dummy_source.q.out index 86c9d3c..a23c8ad 100644 --- a/ql/src/test/results/clientpositive/select_dummy_source.q.out +++ b/ql/src/test/results/clientpositive/select_dummy_source.q.out @@ -190,37 +190,25 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index c70f104..bdb8830 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -5371,15 +5371,11 @@ Plan not optimized by CBO due to missing feature [Others]. Stage-0 Fetch Operator limit:-1 - Stage-1 - Map 1 - File Output Operator [FS_3] - UDTF Operator [UDTF_2] (rows=1 width=0) - function name:explode - Select Operator [SEL_1] (rows=1 width=0) - Output:["_col0"] - TableScan [TS_0] (rows=1 width=1) - _dummy_database@_dummy_table,_dummy_table,Tbl:COMPLETE,Col:COMPLETE + UDTF Operator [UDTF_2] + function name:explode + Select Operator [SEL_1] + Output:["_col0"] + TableScan [TS_0] PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index f4e21bd..f444058 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -51,28 +51,24 @@ Plan not optimized by CBO. Stage-0 Fetch Operator limit:-1 - Stage-1 - Map 1 - File Output Operator [FS_7] - Select Operator [SEL_6] (rows=4000 width=10) - Output:["_col0","_col1"] - Lateral View Join Operator [LVJ_5] (rows=4000 width=10) - Output:["_col0","_col1","_col7"] - Select Operator [SEL_2] (rows=2000 width=10) + Select Operator [SEL_6] + Output:["_col0","_col1"] + Lateral View Join Operator [LVJ_5] + Output:["_col0","_col1","_col7"] + Select Operator [SEL_2] + Output:["key","value"] + Lateral View Forward [LVF_1] + TableScan [TS_0] Output:["key","value"] - Lateral View Forward [LVF_1] (rows=2000 width=10) - TableScan [TS_0] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - File Output Operator [FS_7] - Select Operator [SEL_6] (rows=4000 width=10) - Output:["_col0","_col1"] - Lateral View Join Operator [LVJ_5] (rows=4000 width=10) - Output:["_col0","_col1","_col7"] - UDTF Operator [UDTF_4] (rows=2000 width=10) - function name:explode - Select Operator [SEL_3] (rows=2000 width=10) - Output:["_col0"] - Please refer to the previous Lateral View Forward [LVF_1] + Select Operator [SEL_6] + Output:["_col0","_col1"] + Lateral View Join Operator [LVJ_5] + Output:["_col0","_col1","_col7"] + UDTF Operator [UDTF_4] + function name:explode + Select Operator [SEL_3] + Output:["_col0"] + Please refer to the previous Lateral View Forward [LVF_1] PREHOOK: query: explain show tables PREHOOK: type: SHOWTABLES diff --git a/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out b/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out index 4f34611..fa99b76 100644 --- a/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out +++ b/ql/src/test/results/clientpositive/tez/select_dummy_source.q.out @@ -71,40 +71,22 @@ explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY @@ -185,40 +167,22 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/udf_explode.q.out b/ql/src/test/results/clientpositive/udf_explode.q.out index 8db4375..ea12e80 100644 --- a/ql/src/test/results/clientpositive/udf_explode.q.out +++ b/ql/src/test/results/clientpositive/udf_explode.q.out @@ -13,102 +13,26 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -273,102 +197,26 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value - columns.types int:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out index 2c5268e..f986abf 100644 --- a/ql/src/test/results/clientpositive/udf_inline.q.out +++ b/ql/src/test/results/clientpositive/udf_inline.q.out @@ -20,39 +20,27 @@ POSTHOOK: query: explain SELECT inline( ) as (id, text) FROM SRC limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(const struct(1,'dude!'),const struct(2,'Wheres'),const struct(3,'my car?')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,'dude!'),const struct(2,'Wheres'),const struct(3,'my car?')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT inline( ARRAY( diff --git a/ql/src/test/results/clientpositive/udtf_explode.q.out b/ql/src/test/results/clientpositive/udtf_explode.q.out index 3304109..e067a0a 100644 --- a/ql/src/test/results/clientpositive/udtf_explode.q.out +++ b/ql/src/test/results/clientpositive/udtf_explode.q.out @@ -13,104 +13,28 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -342,39 +266,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal PREHOOK: type: QUERY