diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java index 2a0c469..6f0308d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java @@ -253,7 +253,8 @@ private static void getNestedColsFromExprNodeDesc( if (childDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) childDesc; if (funcDesc.getGenericUDF() instanceof GenericUDFIndex) { - getNestedColsFromExprNodeDesc(funcDesc, pathToRoot, paths); + paths.clear(); + getNestedColsFromExprNodeDesc(funcDesc, null, paths); return; } } diff --git ql/src/test/queries/clientpositive/nested_column_pruning.q ql/src/test/queries/clientpositive/nested_column_pruning.q index 35de3ed..9cfbcbf 100644 --- ql/src/test/queries/clientpositive/nested_column_pruning.q +++ ql/src/test/queries/clientpositive/nested_column_pruning.q @@ -11,15 +11,16 @@ CREATE TABLE nested_tbl_1 ( a int, s1 struct, f6: int>, s2 struct, f11: map>>, - s3 struct>>, - s4 map> + s3 struct>>>, + s4 map>>>> ) STORED AS PARQUET; INSERT INTO TABLE nested_tbl_1 SELECT 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), - named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), - map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)) + named_struct('f12', array(named_struct('f13', 'foo', 'f14', named_struct('f15', 14)), named_struct('f13', 'bar', 'f14', named_struct('f15', 28)))), + map('key1', named_struct('f16', array(named_struct('f17', named_struct('f18', 1)))), + 'key2', named_struct('f16', array(named_struct('f17', named_struct('f18', 2))))) FROM dummy; DROP TABLE IF EXISTS nested_tbl_2; @@ -28,8 +29,9 @@ CREATE TABLE nested_tbl_2 LIKE nested_tbl_1; INSERT INTO TABLE nested_tbl_2 SELECT 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), - named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), - map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)) + named_struct('f12', array(named_struct('f13', 'bar', 'f14', named_struct('f15', 28)), named_struct('f13', 'foo', 'f14', named_struct('f15', 56)))), + map('key3', named_struct('f16', array(named_struct('f17', named_struct('f18', 3)))), + 'key4', named_struct('f16', array(named_struct('f17', named_struct('f18', 4))))) FROM dummy; -- Testing only select statements @@ -129,19 +131,19 @@ SELECT * FROM nested_tbl_3; -- Testing select struct field from elements in array or map EXPLAIN -SELECT count(s1.f6), s3.f12[0].f14 +SELECT count(s1.f6), s3.f12[0].f14.f15 FROM nested_tbl_1 -GROUP BY s3.f12[0].f14; +GROUP BY s3.f12[0].f14.f15; -SELECT count(s1.f6), s3.f12[0].f14 +SELECT count(s1.f6), s3.f12[0].f14.f15 FROM nested_tbl_1 -GROUP BY s3.f12[0].f14; +GROUP BY s3.f12[0].f14.f15; EXPLAIN -SELECT count(s1.f6), s4['key1'].f15 +SELECT count(s1.f6), s4['key1'].f16[0].f17.f18 FROM nested_tbl_1 -GROUP BY s4['key1'].f15; +GROUP BY s4['key1'].f16[0].f17.f18; -SELECT count(s1.f6), s4['key1'].f15 +SELECT count(s1.f6), s4['key1'].f16[0].f17.f18 FROM nested_tbl_1 -GROUP BY s4['key1'].f15; +GROUP BY s4['key1'].f16[0].f17.f18; diff --git ql/src/test/results/clientpositive/nested_column_pruning.q.out ql/src/test/results/clientpositive/nested_column_pruning.q.out index da2908c..0923ce1 100644 --- ql/src/test/results/clientpositive/nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/nested_column_pruning.q.out @@ -27,8 +27,8 @@ PREHOOK: query: CREATE TABLE nested_tbl_1 ( a int, s1 struct, f6: int>, s2 struct, f11: map>>, - s3 struct>>, - s4 map> + s3 struct>>>, + s4 map>>>> ) STORED AS PARQUET PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -37,8 +37,8 @@ POSTHOOK: query: CREATE TABLE nested_tbl_1 ( a int, s1 struct, f6: int>, s2 struct, f11: map>>, - s3 struct>>, - s4 map> + s3 struct>>>, + s4 map>>>> ) STORED AS PARQUET POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -46,8 +46,9 @@ POSTHOOK: Output: default@nested_tbl_1 PREHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), - named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), - map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)) + named_struct('f12', array(named_struct('f13', 'foo', 'f14', named_struct('f15', 14)), named_struct('f13', 'bar', 'f14', named_struct('f15', 28)))), + map('key1', named_struct('f16', array(named_struct('f17', named_struct('f18', 1)))), + 'key2', named_struct('f16', array(named_struct('f17', named_struct('f18', 2))))) FROM dummy PREHOOK: type: QUERY PREHOOK: Input: default@dummy @@ -55,8 +56,9 @@ PREHOOK: Output: default@nested_tbl_1 POSTHOOK: query: INSERT INTO TABLE nested_tbl_1 SELECT 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), - named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), - map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)) + named_struct('f12', array(named_struct('f13', 'foo', 'f14', named_struct('f15', 14)), named_struct('f13', 'bar', 'f14', named_struct('f15', 28)))), + map('key1', named_struct('f16', array(named_struct('f17', named_struct('f18', 1)))), + 'key2', named_struct('f16', array(named_struct('f17', named_struct('f18', 2))))) FROM dummy POSTHOOK: type: QUERY POSTHOOK: Input: default@dummy @@ -81,8 +83,9 @@ POSTHOOK: Output: default@nested_tbl_2 PREHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), - named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), - map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)) + named_struct('f12', array(named_struct('f13', 'bar', 'f14', named_struct('f15', 28)), named_struct('f13', 'foo', 'f14', named_struct('f15', 56)))), + map('key3', named_struct('f16', array(named_struct('f17', named_struct('f18', 3)))), + 'key4', named_struct('f16', array(named_struct('f17', named_struct('f18', 4))))) FROM dummy PREHOOK: type: QUERY PREHOOK: Input: default@dummy @@ -90,8 +93,9 @@ PREHOOK: Output: default@nested_tbl_2 POSTHOOK: query: INSERT INTO TABLE nested_tbl_2 SELECT 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), - named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), - map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)) + named_struct('f12', array(named_struct('f13', 'bar', 'f14', named_struct('f15', 28)), named_struct('f13', 'foo', 'f14', named_struct('f15', 56)))), + map('key3', named_struct('f16', array(named_struct('f17', named_struct('f18', 3)))), + 'key4', named_struct('f16', array(named_struct('f17', named_struct('f18', 4))))) FROM dummy POSTHOOK: type: QUERY POSTHOOK: Input: default@dummy @@ -537,7 +541,7 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s3 (type: struct>>) + expressions: s3 (type: struct>>>) outputColumnNames: s3 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Lateral View Join Operator @@ -553,7 +557,7 @@ STAGE PLANS: outputColumnNames: _col8, _col9 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: struct) + expressions: _col8 (type: int), _col9 (type: struct>) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -564,7 +568,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: _col3.f12 (type: array>) + expressions: _col3.f12 (type: array>>) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -574,7 +578,7 @@ STAGE PLANS: outputColumnNames: _col8, _col9 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: struct) + expressions: _col8 (type: int), _col9 (type: struct>) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -604,7 +608,7 @@ STAGE PLANS: outputColumnNames: _col8, _col9 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: struct) + expressions: _col8 (type: int), _col9 (type: struct>) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -615,7 +619,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator - expressions: _col3.f12 (type: array>) + expressions: _col3.f12 (type: array>>) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -625,7 +629,7 @@ STAGE PLANS: outputColumnNames: _col8, _col9 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: struct) + expressions: _col8 (type: int), _col9 (type: struct>) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -654,10 +658,10 @@ LATERAL VIEW explode(s3.f12) tbl2 AS col2 POSTHOOK: type: QUERY POSTHOOK: Input: default@nested_tbl_1 #### A masked pattern was here #### -10 {"f13":"foo","f14":14} -10 {"f13":"bar","f14":28} -11 {"f13":"foo","f14":14} -11 {"f13":"bar","f14":28} +10 {"f13":"foo","f14":{"f15":14}} +10 {"f13":"bar","f14":{"f15":28}} +11 {"f13":"foo","f14":{"f15":14}} +11 {"f13":"bar","f14":{"f15":28}} PREHOOK: query: -- Testing UDFs EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1 PREHOOK: type: QUERY @@ -1143,16 +1147,16 @@ false foo 4 PREHOOK: query: -- Testing select struct field from elements in array or map EXPLAIN -SELECT count(s1.f6), s3.f12[0].f14 +SELECT count(s1.f6), s3.f12[0].f14.f15 FROM nested_tbl_1 -GROUP BY s3.f12[0].f14 +GROUP BY s3.f12[0].f14.f15 PREHOOK: type: QUERY POSTHOOK: query: -- Testing select struct field from elements in array or map EXPLAIN -SELECT count(s1.f6), s3.f12[0].f14 +SELECT count(s1.f6), s3.f12[0].f14.f15 FROM nested_tbl_1 -GROUP BY s3.f12[0].f14 +GROUP BY s3.f12[0].f14.f15 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1167,7 +1171,7 @@ STAGE PLANS: Pruned Column Paths: s3.f12, s1.f6 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s3.f12[0].f14 (type: int), s1.f6 (type: int) + expressions: s3.f12[0].f14.f15 (type: int), s1.f6 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1207,28 +1211,28 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT count(s1.f6), s3.f12[0].f14 +PREHOOK: query: SELECT count(s1.f6), s3.f12[0].f14.f15 FROM nested_tbl_1 -GROUP BY s3.f12[0].f14 +GROUP BY s3.f12[0].f14.f15 PREHOOK: type: QUERY PREHOOK: Input: default@nested_tbl_1 #### A masked pattern was here #### -POSTHOOK: query: SELECT count(s1.f6), s3.f12[0].f14 +POSTHOOK: query: SELECT count(s1.f6), s3.f12[0].f14.f15 FROM nested_tbl_1 -GROUP BY s3.f12[0].f14 +GROUP BY s3.f12[0].f14.f15 POSTHOOK: type: QUERY POSTHOOK: Input: default@nested_tbl_1 #### A masked pattern was here #### 1 14 PREHOOK: query: EXPLAIN -SELECT count(s1.f6), s4['key1'].f15 +SELECT count(s1.f6), s4['key1'].f16[0].f17.f18 FROM nested_tbl_1 -GROUP BY s4['key1'].f15 +GROUP BY s4['key1'].f16[0].f17.f18 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT count(s1.f6), s4['key1'].f15 +SELECT count(s1.f6), s4['key1'].f16[0].f17.f18 FROM nested_tbl_1 -GROUP BY s4['key1'].f15 +GROUP BY s4['key1'].f16[0].f17.f18 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1243,7 +1247,7 @@ STAGE PLANS: Pruned Column Paths: s1.f6 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s4['key1'].f15 (type: int), s1.f6 (type: int) + expressions: s4['key1'].f16[0].f17.f18 (type: int), s1.f6 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1283,15 +1287,15 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT count(s1.f6), s4['key1'].f15 +PREHOOK: query: SELECT count(s1.f6), s4['key1'].f16[0].f17.f18 FROM nested_tbl_1 -GROUP BY s4['key1'].f15 +GROUP BY s4['key1'].f16[0].f17.f18 PREHOOK: type: QUERY PREHOOK: Input: default@nested_tbl_1 #### A masked pattern was here #### -POSTHOOK: query: SELECT count(s1.f6), s4['key1'].f15 +POSTHOOK: query: SELECT count(s1.f6), s4['key1'].f16[0].f17.f18 FROM nested_tbl_1 -GROUP BY s4['key1'].f15 +GROUP BY s4['key1'].f16[0].f17.f18 POSTHOOK: type: QUERY POSTHOOK: Input: default@nested_tbl_1 #### A masked pattern was here ####