Index: contrib/src/test/results/clientpositive/udtf_output_on_close.q.out =================================================================== --- contrib/src/test/results/clientpositive/udtf_output_on_close.q.out (revision 0) +++ contrib/src/test/results/clientpositive/udtf_output_on_close.q.out (revision 0) @@ -0,0 +1,28 @@ +PREHOOK: query: CREATE TEMPORARY FUNCTION udtfCount2 AS 'org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFCount2' +PREHOOK: type: CREATEFUNCTION +POSTHOOK: query: CREATE TEMPORARY FUNCTION udtfCount2 AS 'org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFCount2' +POSTHOOK: type: CREATEFUNCTION +PREHOOK: query: SELECT udtfCount2(key) AS count FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/contrib/scratchdir/hive_2010-04-07_19-27-17_031_6344870299379030501/10000 +POSTHOOK: query: SELECT udtfCount2(key) AS count FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/contrib/scratchdir/hive_2010-04-07_19-27-17_031_6344870299379030501/10000 +500 +500 +PREHOOK: query: SELECT * FROM src LATERAL VIEW udtfCount2(key) myTable AS myCol +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/contrib/scratchdir/hive_2010-04-07_19-27-21_181_94664289955554193/10000 +POSTHOOK: query: SELECT * FROM src LATERAL VIEW udtfCount2(key) myTable AS myCol +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/contrib/scratchdir/hive_2010-04-07_19-27-21_181_94664289955554193/10000 +97 val_97 500 +97 val_97 500 +PREHOOK: query: DROP TEMPORARY FUNCTION udtfCount +PREHOOK: type: DROPFUNCTION +POSTHOOK: query: DROP TEMPORARY FUNCTION udtfCount +POSTHOOK: type: DROPFUNCTION Index: contrib/src/test/queries/clientpositive/udtf_output_on_close.q =================================================================== --- contrib/src/test/queries/clientpositive/udtf_output_on_close.q (revision 0) +++ contrib/src/test/queries/clientpositive/udtf_output_on_close.q (revision 0) @@ -0,0 +1,9 @@ +add jar ../build/contrib/hive_contrib.jar; + +CREATE TEMPORARY FUNCTION udtfCount2 AS 'org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFCount2'; + +SELECT udtfCount2(key) AS count FROM src; + +SELECT * FROM src LATERAL VIEW udtfCount2(key) myTable AS myCol; + +DROP TEMPORARY FUNCTION udtfCount; \ No newline at end of file Index: contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java =================================================================== --- contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java (revision 0) +++ contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java (revision 0) @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.contrib.udtf.example; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + + +/** + * GenericUDTFCount2 outputs the number of rows seen, twice. It's output twice + * to test outputting of rows on close with lateral view. + * + */ +public class GenericUDTFCount2 extends GenericUDTF { + + Integer count = Integer.valueOf(0); + Object forwardObj[] = new Object[1]; + + @Override + public void close() throws HiveException { + forwardObj[0] = count; + forward(forwardObj); + forward(forwardObj); + } + + @Override + public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { + ArrayList fieldNames = new ArrayList(); + ArrayList fieldOIs = new ArrayList(); + fieldNames.add("col1"); + fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, + fieldOIs); + } + + @Override + public void process(Object[] args) throws HiveException { + count = Integer.valueOf(count.intValue() + 1); + } + +} Index: ql/src/gen-py/queryplan/ttypes.py =================================================================== --- ql/src/gen-py/queryplan/ttypes.py (revision 7351) +++ ql/src/gen-py/queryplan/ttypes.py (working copy) @@ -38,6 +38,7 @@ UNION = 12 UDTF = 13 LATERALVIEWJOIN = 14 + LATERALVIEWFORWARD = 15 class TaskType: MAP = 0 Index: ql/src/test/results/clientpositive/lateral_view_ppd.q.out =================================================================== --- ql/src/test/results/clientpositive/lateral_view_ppd.q.out (revision 0) +++ ql/src/test/results/clientpositive/lateral_view_ppd.q.out (revision 0) @@ -0,0 +1,545 @@ +PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) '0')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = '0') + type: boolean + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: (_col0 = '0') + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: (_col0 = '0') + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-14_652_3272602982878468302/10000 +POSTHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-14_652_3272602982878468302/10000 +val_0 1 +val_0 2 +val_0 3 +val_0 1 +val_0 2 +val_0 3 +val_0 1 +val_0 2 +val_0 3 +PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) '0') (= (TOK_TABLE_OR_COL myCol) 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = '0') + type: boolean + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: ((_col0 = '0') and (_col2 = 1)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: ((_col0 = '0') and (_col2 = 1)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-19_184_3947037482170607721/10000 +POSTHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-19_184_3947037482170607721/10000 +val_0 1 +val_0 1 +val_0 1 +PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) "12"))) (TOK_LIMIT 12))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = '12')) + type: boolean + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col1, _col4, _col2, _col3 + Filter Operator + predicate: + expr: ((_col2 = '2008-04-08') and (_col3 = '12')) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col1, _col4, _col2, _col3 + Filter Operator + predicate: + expr: ((_col2 = '2008-04-08') and (_col3 = '12')) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 12 + + +PREHOOK: query: SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-25_323_2549037778500077245/10000 +POSTHOOK: query: SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-25_323_2549037778500077245/10000 +val_238 1 +val_238 2 +val_238 3 +val_86 1 +val_86 2 +val_86 3 +val_311 1 +val_311 2 +val_311 3 +val_27 1 +val_27 2 +val_27 3 +PREHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol2 (TOK_TABALIAS myTable2))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) '0')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = '0') + type: boolean + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col0 + type: string + outputColumnNames: _col1, _col2, _col0 + Filter Operator + predicate: + expr: (_col0 = '0') + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col0 + type: string + outputColumnNames: _col1, _col2, _col0 + Filter Operator + predicate: + expr: (_col0 = '0') + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col0 + type: string + outputColumnNames: _col1, _col2, _col0 + Filter Operator + predicate: + expr: (_col0 = '0') + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col0 + type: string + outputColumnNames: _col1, _col2, _col0 + Filter Operator + predicate: + expr: (_col0 = '0') + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-30_014_6879501059714270125/10000 +POSTHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-08_16-08-30_014_6879501059714270125/10000 +val_0 1 +val_0 1 +val_0 1 +val_0 2 +val_0 2 +val_0 2 +val_0 3 +val_0 3 +val_0 3 +val_0 1 +val_0 1 +val_0 1 +val_0 2 +val_0 2 +val_0 2 +val_0 3 +val_0 3 +val_0 3 +val_0 1 +val_0 1 +val_0 1 +val_0 2 +val_0 2 +val_0 2 +val_0 3 +val_0 3 +val_0 3 Index: ql/src/test/results/clientpositive/lateral_view.q.out =================================================================== --- ql/src/test/results/clientpositive/lateral_view.q.out (revision 7351) +++ ql/src/test/results/clientpositive/lateral_view.q.out (working copy) @@ -32,41 +32,9 @@ src TableScan alias: src - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: int - sort order: ++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: int - Select Operator - expressions: - expr: array(1,2,3) - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Select Operator @@ -93,6 +61,39 @@ type: string expr: _col2 type: int + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col2 + type: int + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int Reduce Operator Tree: Extract Limit @@ -106,7 +107,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-16_286_7545304927031880480/10002 + file:/tmp/pyang/hive_2010-07-12_18-55-27_411_1145974600848861508/10002 Reduce Output Operator key expressions: expr: _col0 @@ -156,29 +157,9 @@ src TableScan alias: src - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col2 - type: int - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: array(1,2,3) - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Select Operator @@ -193,6 +174,27 @@ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col2 + type: int + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -218,85 +220,14 @@ src TableScan alias: src - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: int - expr: _col3 - type: string - outputColumnNames: _col0, _col1 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: array('a','b','c') - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: int - expr: _col3 - type: string - outputColumnNames: _col0, _col1 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: array(1,2,3) - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: int - expr: _col3 - type: string - outputColumnNames: _col0, _col1 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: array('a','b','c') - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1, _col2, _col3 Select Operator @@ -313,6 +244,80 @@ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array('a','b','c') + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array('a','b','c') + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -338,79 +343,14 @@ src TableScan alias: src - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: - expr: _col3 - type: int - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: _col2 - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: - expr: _col3 - type: int - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: array(array(1,2,3)) - type: array> - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - expressions: - expr: _col3 - type: int - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: _col2 - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1, _col2, _col3 Select Operator @@ -425,6 +365,74 @@ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: _col2 + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col3 + type: int + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(array(1,2,3)) + type: array> + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col3 + type: int + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: _col2 + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col3 + type: int + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -435,24 +443,24 @@ SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-16_764_354722541681050276/10000 +PREHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-28_009_6995650598612404812/10000 POSTHOOK: query: -- Verify that * selects columns from both tables SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-16_764_354722541681050276/10000 +POSTHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-28_009_6995650598612404812/10000 POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] 0 val_0 1 PREHOOK: query: -- TABLE.* should be supported SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-23_656_7186983670906657556/10000 +PREHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-34_711_2463279077825371084/10000 POSTHOOK: query: -- TABLE.* should be supported SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-23_656_7186983670906657556/10000 +POSTHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-34_711_2463279077825371084/10000 POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] 1 2 @@ -461,12 +469,12 @@ SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-27_321_7329581422596695757/10000 +PREHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-38_741_540021488120755230/10000 POSTHOOK: query: -- Multiple lateral views should result in a Cartesian product SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-27_321_7329581422596695757/10000 +POSTHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-38_741_540021488120755230/10000 POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] 1 a 1 b @@ -481,12 +489,12 @@ SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-31_056_3810278535508636145/10000 +PREHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-42_193_8532007851928358632/10000 POSTHOOK: query: -- Should be able to reference tables generated earlier SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-31_056_3810278535508636145/10000 +POSTHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-42_193_8532007851928358632/10000 POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] 1 2 @@ -512,29 +520,9 @@ tmp_pyang_lv TableScan alias: tmp_pyang_lv - Select Operator - SELECT * : (no compute) - Lateral View Join Operator - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col1 - type: int - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Select Operator - expressions: - expr: array(1,2,3) - type: array - outputColumnNames: _col0 - UDTF Operator - function name: explode + Lateral View Forward + Select Operator + SELECT * : (no compute) Lateral View Join Operator outputColumnNames: _col0, _col1 Select Operator @@ -549,6 +537,27 @@ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: int + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -558,11 +567,11 @@ PREHOOK: query: select myCol from tmp_pyang_lv LATERAL VIEW explode(array(1,2,3)) myTab as myCol limit 3 PREHOOK: type: QUERY PREHOOK: Input: default@tmp_pyang_lv -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-35_530_6647775029322914068/10000 +PREHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-45_516_3682011975992575089/10000 POSTHOOK: query: select myCol from tmp_pyang_lv LATERAL VIEW explode(array(1,2,3)) myTab as myCol limit 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@tmp_pyang_lv -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-06-18_19-54-35_530_6647775029322914068/10000 +POSTHOOK: Output: file:/tmp/pyang/hive_2010-07-12_18-55-45_516_3682011975992575089/10000 POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] 1 2 Index: ql/src/test/queries/clientpositive/lateral_view_ppd.q =================================================================== --- ql/src/test/queries/clientpositive/lateral_view_ppd.q (revision 0) +++ ql/src/test/queries/clientpositive/lateral_view_ppd.q (revision 0) @@ -0,0 +1,13 @@ +set hive.optimize.ppd=true; + +EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0'; +SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0'; + +EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1; +SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE key='0' AND myCol=1; + +EXPLAIN SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12; +SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12; + +EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0'; +SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0'; Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (revision 7351) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java (working copy) @@ -45,16 +45,16 @@ * of the filter expressions can be pushed nearer to the operator that sees this * particular data for the first time. e.g. select a.*, b.* from a join b on * (a.col1 = b.col1) where a.col1 > 20 and b.col2 > 40 - * + * * For the above query, the predicates (a.col1 > 20) and (b.col2 > 40), without * predicate pushdown, would be evaluated after the join processing has been * done. Suppose the two predicates filter out most of the rows from a and b, * the join is unnecessarily processing these rows. With predicate pushdown, * these two predicates will be processed before the join. - * + * * Predicate pushdown is enabled by setting hive.optimize.ppd to true. It is * disable by default. - * + * * The high-level algorithm is describe here - An operator is processed after * all its children have been processed - An operator processes its own * predicates and then merges (conjunction) with the processed predicates of its @@ -90,6 +90,8 @@ opRules.put(new RuleRegExp("R5", "TS%"), OpProcFactory.getTSProc()); opRules.put(new RuleRegExp("R6", "SCR%"), OpProcFactory.getSCRProc()); opRules.put(new RuleRegExp("R6", "LIM%"), OpProcFactory.getLIMProc()); + opRules.put(new RuleRegExp("R7", "UDTF%"), OpProcFactory.getUDTFProc()); + opRules.put(new RuleRegExp("R8", "LVF%"), OpProcFactory.getLVFProc()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (revision 7351) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (working copy) @@ -81,6 +81,25 @@ } + public static class LateralViewForwardPPD extends DefaultPPD implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + LOG.info("Processing for " + nd.getName() + "(" + + ((Operator) nd).getIdentifier() + ")"); + OpWalkerInfo owi = (OpWalkerInfo) procCtx; + + ExprWalkerInfo childPreds = owi + .getPrunedPreds((Operator) nd.getChildren() + .get(0)); + + owi.putPrunedPreds((Operator) nd, childPreds); + return null; + } + + } + /** * Combines predicates of its child into a single expression and adds a filter * op as new child. @@ -169,7 +188,7 @@ * thus disallowing predicate expr containing both tables a and b (such as * a.c3 + a.c4 > 20). Such predicates also can be pushed just above the * second join and below the first join - * + * * @param op * Join Operator * @param rr @@ -221,7 +240,7 @@ /** * Processor for ReduceSink operator. - * + * */ public static class ReduceSinkPPD extends DefaultPPD implements NodeProcessor { @Override @@ -277,7 +296,7 @@ /** * Take current operators pushdown predicates and merges them with * children's pushdown predicates. - * + * * @param nd * current operator * @param owi @@ -408,6 +427,14 @@ return new ScriptPPD(); } + public static NodeProcessor getUDTFProc() { + return new ScriptPPD(); + } + + public static NodeProcessor getLVFProc() { + return new LateralViewForwardPPD(); + } + private OpProcFactory() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java (revision 7351) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java (working copy) @@ -24,35 +24,50 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** - * The lateral view join operator is used to implement the lateral view - * functionality. This operator was implemented with the following operator DAG - * in mind. For a query such as - * + * The lateral view join operator is used for FROM src LATERAL VIEW udtf()... + * This operator was implemented with the following operator DAG in mind. + * + * For a query such as + * * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS * adid - * - * The top of the operator tree will look similar to - * - * [Table Scan] / \ [Select](*) [Select](adid_list) | | | [UDTF] (explode) \ / - * [Lateral View Join] | | [Select] (pageid, adid.*) | .... - * - * Rows from the table scan operator are first sent to two select operators. The + * + * The top of the operator DAG will look similar to + * + * [Table Scan] + * | + * [Lateral View Forward] + * / \ + * [Select](*) [Select](adid_list) + * | | + * | [UDTF] (explode) + * \ / + * [Lateral View Join] + * | + * | + * [Select] (pageid, adid.*) + * | + * .... + * + * Rows from the table scan operator are first to a lateral view forward + * operator that just forwards the row and marks the start of a LV. The * select operator on the left picks all the columns while the select operator * on the right picks only the columns needed by the UDTF. - * + * * The output of select in the left branch and output of the UDTF in the right * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF * will generate > 1 row for every row received from the TS, while the left * select operator will generate only one. For each row output from the TS, the * LVJ outputs all possible rows that can be created by joining the row from the * left select and one of the rows output from the UDTF. - * + * * Additional lateral views can be supported by adding a similar DAG after the * previous LVJ operator. */ @@ -75,6 +90,7 @@ // The output of the lateral view join will be the columns from the select // parent, followed by the column from the UDTF parent StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[SELECT_TAG]; + List sfs = soi.getAllStructFieldRefs(); for (StructField sf : sfs) { ois.add(sf.getFieldObjectInspector()); @@ -126,4 +142,9 @@ return "LVJ"; } + @Override + public int getType() { + return OperatorType.LATERALVIEWJOIN; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java (revision 7351) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java (working copy) @@ -51,7 +51,6 @@ * sends periodic reports back to the tracker. */ transient AutoProgressor autoProgressor; - transient boolean closeCalled = false; @Override protected void initializeOp(Configuration hconf) throws HiveException { @@ -103,14 +102,11 @@ * forwardUDTFOutput is typically called indirectly by the GenericUDTF when * the GenericUDTF has generated output rows that should be passed on to the * next operator(s) in the DAG. - * + * * @param o * @throws HiveException */ public void forwardUDTFOutput(Object o) throws HiveException { - if (closeCalled) { - throw new HiveException("UDTF's should not output rows on close"); - } // Since the output of the UDTF is a struct, we can just forward that forward(o, outputObjInspector); } @@ -127,7 +123,6 @@ @Override protected void closeOp(boolean abort) throws HiveException { - closeCalled = true; conf.getGenericUDTF().close(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (revision 7351) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -82,6 +83,8 @@ opvec.add(new OpTuple(UDTFDesc.class, UDTFOperator.class)); opvec.add(new OpTuple(LateralViewJoinDesc.class, LateralViewJoinOperator.class)); + opvec.add(new OpTuple(LateralViewForwardDesc.class, + LateralViewForwardOperator.class)); } public static Operator get(Class opClass) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java (revision 0) @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; + +/** + * LateralViewForwardOperator. This operator sits at the head of the operator + * DAG for a lateral view. This does nothing, but it aids the predicate push + * down during traversal to identify when a lateral view occurs. + * + */ +public class LateralViewForwardOperator extends Operator { + + private static final long serialVersionUID = 1L; + + @Override + public void processOp(Object row, int tag) throws HiveException { + forward(row, inputObjInspectors[tag]); + } + + @Override + public String getName() { + return "LVF"; + } + + @Override + public int getType() { + return OperatorType.LATERALVIEWFORWARD; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewForwardDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewForwardDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewForwardDesc.java (revision 0) @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +/** + * LateralViewForwardDesc. + * + */ +@Explain(displayName = "Lateral View Forward") +public class LateralViewForwardDesc implements Serializable { + private static final long serialVersionUID = 1L; + + public LateralViewForwardDesc() { + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 7351) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -122,6 +122,7 @@ import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; @@ -5609,21 +5610,27 @@ // TS -> SelectOperator(*) -> LateralViewJoinOperator // TS -> SelectOperator (gets cols for UDTF) -> UDTFOperator0 // -> LateralViewJoinOperator + // + RowResolver lvForwardRR = opParseCtx.get(op).getRR(); + Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild( + new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), + op), lvForwardRR); + // The order in which the two paths are added is important. The // lateral view join operator depends on having the select operator // give it the row first. - // Get the all path by making a select(*) - RowResolver allPathRR = opParseCtx.get(op).getRR(); + // Get the all path by making a select(*). + RowResolver allPathRR = opParseCtx.get(lvForward).getRR(); + //Operator allPath = op; Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), - op), allPathRR); - + new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), + lvForward), allPathRR); // Get the UDTF Path QB blankQb = new QB(null, null, false); Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree - .getChild(0), blankQb, op); + .getChild(0), blankQb, lvForward); RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRR(); // Merge the two into the lateral view join @@ -5637,10 +5644,26 @@ LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames); LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames); + // For PPD, we need a column to expression map so that during the walk, + // the processor knows how to transform the internal col names. + // Following steps are dependant on the fact that we called + // LVmerge.. in the above order + Map colExprMap = new HashMap(); + + int i=0; + for (ColumnInfo c : allPathRR.getColumnInfos()) { + String internalName = getColumnInternalName(i); + i++; + colExprMap.put(internalName, + new ExprNodeColumnDesc(c.getType(), c.getInternalName(), + c.getTabAlias(), c.getIsPartitionCol())); + } + Operator lateralViewJoin = putOpInsertMap(OperatorFactory .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames), - new RowSchema(lateralViewRR.getColumnInfos()), allPath, - udtfPath), lateralViewRR); + new RowSchema(lateralViewRR.getColumnInfos()), allPath, + udtfPath), lateralViewRR); + lateralViewJoin.setColumnExprMap(colExprMap); op = lateralViewJoin; } e.setValue(op); Index: ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java =================================================================== --- ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (revision 7351) +++ ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (working copy) @@ -29,6 +29,7 @@ public static final int UNION = 12; public static final int UDTF = 13; public static final int LATERALVIEWJOIN = 14; + public static final int LATERALVIEWFORWARD = 15; public static final IntRangeSet VALID_VALUES = new IntRangeSet( JOIN, @@ -45,7 +46,8 @@ REDUCESINK, UNION, UDTF, - LATERALVIEWJOIN ); + LATERALVIEWJOIN, + LATERALVIEWFORWARD ); public static final Map VALUES_TO_NAMES = new HashMap() {{ put(JOIN, "JOIN"); @@ -63,5 +65,6 @@ put(UNION, "UNION"); put(UDTF, "UDTF"); put(LATERALVIEWJOIN, "LATERALVIEWJOIN"); + put(LATERALVIEWFORWARD, "LATERALVIEWFORWARD"); }}; } Index: ql/src/gen-php/queryplan_types.php =================================================================== --- ql/src/gen-php/queryplan_types.php (revision 7351) +++ ql/src/gen-php/queryplan_types.php (working copy) @@ -51,6 +51,7 @@ 'UNION' => 12, 'UDTF' => 13, 'LATERALVIEWJOIN' => 14, + 'LATERALVIEWFORWARD' => 15, ); final class OperatorType { @@ -69,6 +70,7 @@ const UNION = 12; const UDTF = 13; const LATERALVIEWJOIN = 14; + const LATERALVIEWFORWARD = 15; static public $__names = array( 0 => 'JOIN', 1 => 'MAPJOIN', @@ -85,6 +87,7 @@ 12 => 'UNION', 13 => 'UDTF', 14 => 'LATERALVIEWJOIN', + 15 => 'LATERALVIEWFORWARD', ); } Index: ql/if/queryplan.thrift =================================================================== --- ql/if/queryplan.thrift (revision 7351) +++ ql/if/queryplan.thrift (working copy) @@ -16,7 +16,7 @@ } #Represents a operator along with its counters -enum OperatorType { JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN } +enum OperatorType { JOIN, MAPJOIN, EXTRACT, FILTER, FORWARD, GROUPBY, LIMIT, SCRIPT, SELECT, TABLESCAN, FILESINK, REDUCESINK, UNION, UDTF, LATERALVIEWJOIN, LATERALVIEWFORWARD } struct Operator { 1: string operatorId, 2: OperatorType operatorType,