diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index acafd73..45839ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -671,10 +671,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, List colsAfterReplacement = new ArrayList<>(); List newCols = new ArrayList<>(); - for (FieldNode col : cols) { - int index = outputCols.indexOf(col.getFieldName()); + for (int index = 0; index < numSelColumns; index++) { + String colName = outputCols.get(index); + FieldNode col = lookupColumn(cols, colName); // colExprMap.size() == size of cols from SEL(*) branch - if (index >= 0 && index < numSelColumns) { + if (col != null) { ExprNodeDesc transformed = colExprMap.get(col.getFieldName()); colsAfterReplacement = mergeFieldNodesWithDesc(colsAfterReplacement, transformed); newCols.add(col); @@ -713,12 +714,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, RowSchema rs = op.getSchema(); ArrayList colList = new ArrayList<>(); List outputCols = new ArrayList<>(); - for (FieldNode col : cols) { - // revert output cols of SEL(*) to ExprNodeColumnDesc - ColumnInfo colInfo = rs.getColumnInfo(col.getFieldName()); - ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(colInfo); - colList.add(colExpr); - outputCols.add(col); + for (ColumnInfo colInfo : rs.getSignature()) { + FieldNode col = lookupColumn(cols, colInfo.getInternalName()); + if (col != null) { + // revert output cols of SEL(*) to ExprNodeColumnDesc + ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(colInfo); + colList.add(colExpr); + outputCols.add(col); + } } // replace SEL(*) to SEL(exprs) ((SelectDesc)select.getConf()).setSelStarNoCompute(false); diff --git ql/src/test/queries/clientpositive/lateral_view_onview.q ql/src/test/queries/clientpositive/lateral_view_onview.q index d8fca67..fa559f4 100644 --- ql/src/test/queries/clientpositive/lateral_view_onview.q +++ ql/src/test/queries/clientpositive/lateral_view_onview.q @@ -22,3 +22,9 @@ SELECT SIZE(c2),c3,TRIM(c1),c4,myCol from lv_view LATERAL VIEW explode(array(1,2 SELECT SIZE(c2),c3,TRIM(c1),c4,myCol from lv_view LATERAL VIEW explode(array(1,2,3)) myTab as myCol limit 3; +CREATE TABLE lv_table1( c1 STRING, c3 INT, c4 CHAR(1), c5 STRING, c6 STRING, c7 STRING, c8 STRING, c9 STRING, c10 STRING, c11 STRING, c12 STRING, c13 STRING); +CREATE TABLE lv_table2( c1 STRING, c2 ARRAY); +INSERT OVERWRITE TABLE lv_table1 SELECT 'abc ', 100, 't', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test' FROM src; +INSERT OVERWRITE TABLE lv_table2 SELECT 'abc ', array(1,2,3) FROM src; +EXPLAIN WITH lv_view1 AS (SELECT lv_table1.*, c2 FROM lv_table1 JOIN lv_table2 ON lv_table1.c1 = lv_table2.c1), lv_view2 AS (SELECT * FROM lv_view1 LATERAL VIEW explode(c2) myTable AS myCol) SELECT * FROM lv_view2 SORT BY c1 ASC, myCol ASC LIMIT 1; +WITH lv_view1 AS (SELECT lv_table1.*, c2 FROM lv_table1 JOIN lv_table2 ON lv_table1.c1 = lv_table2.c1), lv_view2 AS (SELECT * FROM lv_view1 LATERAL VIEW explode(c2) myTable AS myCol) SELECT * FROM lv_view2 SORT BY c1 ASC, myCol ASC LIMIT 1; \ No newline at end of file diff --git ql/src/test/results/clientpositive/lateral_view_onview.q.out ql/src/test/results/clientpositive/lateral_view_onview.q.out index 1d3e825..423885e 100644 --- ql/src/test/results/clientpositive/lateral_view_onview.q.out +++ ql/src/test/results/clientpositive/lateral_view_onview.q.out @@ -545,11 +545,11 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 8500 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: array), _col2 (type: int), _col0 (type: string), _col3 (type: char(1)) - outputColumnNames: _col1, _col2, _col0, _col3 + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: int), _col3 (type: char(1)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 8500 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator - outputColumnNames: _col1, _col2, _col0, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1000 Data size: 17000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: size(_col1) (type: int), _col2 (type: int), trim(_col0) (type: string), _col3 (type: char(1)), _col4 (type: int) @@ -573,7 +573,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 8500 Basic stats: COMPLETE Column stats: NONE function name: explode Lateral View Join Operator - outputColumnNames: _col1, _col2, _col0, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1000 Data size: 17000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: size(_col1) (type: int), _col2 (type: int), trim(_col0) (type: string), _col3 (type: char(1)), _col4 (type: int) @@ -609,3 +609,200 @@ POSTHOOK: Input: default@lv_view 3 100 abc t 1 3 100 abc t 2 3 100 abc t 3 +PREHOOK: query: CREATE TABLE lv_table1( c1 STRING, c3 INT, c4 CHAR(1), c5 STRING, c6 STRING, c7 STRING, c8 STRING, c9 STRING, c10 STRING, c11 STRING, c12 STRING, c13 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lv_table1 +POSTHOOK: query: CREATE TABLE lv_table1( c1 STRING, c3 INT, c4 CHAR(1), c5 STRING, c6 STRING, c7 STRING, c8 STRING, c9 STRING, c10 STRING, c11 STRING, c12 STRING, c13 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lv_table1 +PREHOOK: query: CREATE TABLE lv_table2( c1 STRING, c2 ARRAY) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lv_table2 +POSTHOOK: query: CREATE TABLE lv_table2( c1 STRING, c2 ARRAY) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lv_table2 +PREHOOK: query: INSERT OVERWRITE TABLE lv_table1 SELECT 'abc ', 100, 't', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test' FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@lv_table1 +POSTHOOK: query: INSERT OVERWRITE TABLE lv_table1 SELECT 'abc ', 100, 't', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test' FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@lv_table1 +POSTHOOK: Lineage: lv_table1.c1 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c10 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c11 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c12 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c13 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c3 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c4 EXPRESSION [] +POSTHOOK: Lineage: lv_table1.c5 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c6 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c7 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c8 SIMPLE [] +POSTHOOK: Lineage: lv_table1.c9 SIMPLE [] +PREHOOK: query: INSERT OVERWRITE TABLE lv_table2 SELECT 'abc ', array(1,2,3) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@lv_table2 +POSTHOOK: query: INSERT OVERWRITE TABLE lv_table2 SELECT 'abc ', array(1,2,3) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@lv_table2 +POSTHOOK: Lineage: lv_table2.c1 SIMPLE [] +POSTHOOK: Lineage: lv_table2.c2 EXPRESSION [] +PREHOOK: query: EXPLAIN WITH lv_view1 AS (SELECT lv_table1.*, c2 FROM lv_table1 JOIN lv_table2 ON lv_table1.c1 = lv_table2.c1), lv_view2 AS (SELECT * FROM lv_view1 LATERAL VIEW explode(c2) myTable AS myCol) SELECT * FROM lv_view2 SORT BY c1 ASC, myCol ASC LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN WITH lv_view1 AS (SELECT lv_table1.*, c2 FROM lv_table1 JOIN lv_table2 ON lv_table1.c1 = lv_table2.c1), lv_view2 AS (SELECT * FROM lv_view1 LATERAL VIEW explode(c2) myTable AS myCol) SELECT * FROM lv_view2 SORT BY c1 ASC, myCol ASC LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lv_table1 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: string) + sort order: + + Map-reduce partition columns: c1 (type: string) + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: NONE + value expressions: c3 (type: int), c4 (type: char(1)), c5 (type: string), c6 (type: string), c7 (type: string), c8 (type: string), c9 (type: string), c10 (type: string), c11 (type: string), c12 (type: string), c13 (type: string) + TableScan + alias: lv_table2 + Statistics: Num rows: 500 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: string) + sort order: + + Map-reduce partition columns: c1 (type: string) + Statistics: Num rows: 500 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: array) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 c1 (type: string) + 1 c1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col16 + Statistics: Num rows: 550 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: char(1)), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col16 (type: array) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 550 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 550 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: char(1)), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: array) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 550 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1100 Data size: 61600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col12 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 550 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1100 Data size: 61600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col13 (type: int) + sort order: ++ + Statistics: Num rows: 1100 Data size: 61600 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: char(1)), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: array) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: char(1)), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: string), VALUE._col9 (type: string), VALUE._col10 (type: string), VALUE._col11 (type: array), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1100 Data size: 61600 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col13 (type: int) + sort order: ++ + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: char(1)), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: array) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: char(1)), VALUE._col2 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: string), VALUE._col9 (type: string), VALUE._col10 (type: string), VALUE._col11 (type: array), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: WITH lv_view1 AS (SELECT lv_table1.*, c2 FROM lv_table1 JOIN lv_table2 ON lv_table1.c1 = lv_table2.c1), lv_view2 AS (SELECT * FROM lv_view1 LATERAL VIEW explode(c2) myTable AS myCol) SELECT * FROM lv_view2 SORT BY c1 ASC, myCol ASC LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@lv_table1 +PREHOOK: Input: default@lv_table2 +#### A masked pattern was here #### +POSTHOOK: query: WITH lv_view1 AS (SELECT lv_table1.*, c2 FROM lv_table1 JOIN lv_table2 ON lv_table1.c1 = lv_table2.c1), lv_view2 AS (SELECT * FROM lv_view1 LATERAL VIEW explode(c2) myTable AS myCol) SELECT * FROM lv_view2 SORT BY c1 ASC, myCol ASC LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lv_table1 +POSTHOOK: Input: default@lv_table2 +#### A masked pattern was here #### +abc 100 t test test test test test test test test test [1,2,3] 1