Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1440586) +++ conf/hive-default.xml.template (working copy) @@ -1138,6 +1138,14 @@ + hive.stats.collect.scancols + false + Whether column accesses are tracked in the QueryPlan. + This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed. + + + + hive.stats.ndv.error 20.0 Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost.A lower value for error indicates higher accuracy and a higher compute cost. Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1440586) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -566,6 +566,8 @@ HIVE_STATS_RELIABLE("hive.stats.reliable", false), // Collect table access keys information for operators that can benefit from bucketing HIVE_STATS_COLLECT_TABLEKEYS("hive.stats.collect.tablekeys", false), + // Collect column access information + HIVE_STATS_COLLECT_SCANCOLS("hive.stats.collect.scancols", false), // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a // higher compute cost. HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0), Index: ql/src/test/results/clientpositive/column_access_stats.q.out =================================================================== --- ql/src/test/results/clientpositive/column_access_stats.q.out (revision 0) +++ ql/src/test/results/clientpositive/column_access_stats.q.out (working copy) @@ -0,0 +1,1077 @@ +PREHOOK: query: -- This test is used for testing the ColumnAccessAnalyzer + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: query: -- Simple select queries +SELECT key FROM T1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key + +1 +2 +3 +7 +8 +8 +PREHOOK: query: SELECT key, val FROM T1 ORDER BY key, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key,val + +1 11 +2 12 +3 13 +7 17 +8 18 +8 28 +PREHOOK: query: SELECT 1 FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +PREHOOK: query: -- More complicated select queries +EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key + +1 +2 +3 +7 +8 +8 +PREHOOK: query: EXPLAIN SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key + +1 +2 +3 +7 +8 +8 +PREHOOK: query: SELECT key + 1 as k FROM T1 ORDER BY k +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key + +2.0 +3.0 +4.0 +8.0 +9.0 +9.0 +PREHOOK: query: SELECT key + val as k FROM T1 ORDER BY k +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key,val + +12.0 +14.0 +16.0 +24.0 +26.0 +36.0 +PREHOOK: query: -- Work with union +EXPLAIN +SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT val as c FROM T1 +) subq1 ORDER BY c +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val) c))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:subq1-subquery1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:subq1-subquery2:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: val + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT val as c FROM T1 +) subq1 ORDER BY c +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key,val + +1 +11 +12 +13 +17 +18 +2 +28 +3 +7 +8 +8 +PREHOOK: query: EXPLAIN +SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT key as c FROM T1 +) subq1 ORDER BY c +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:subq1-subquery1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + null-subquery2:subq1-subquery2:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT key as c FROM T1 +) subq1 ORDER BY c +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Table:t1 +Columns:key + +1 +1 +2 +2 +3 +3 +7 +7 +8 +8 +8 +8 +PREHOOK: query: -- Work with insert overwrite +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +Table:t1 +Columns:key,val + +PREHOOK: query: -- Simple joins +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key +ORDER BY T1.key, T1.val, T2.key, T2.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Table:t2 +Columns:key,val + +Table:t1 +Columns:key,val + +1 11 1 1 +2 12 2 1 +3 13 3 1 +7 17 7 1 +8 18 8 2 +8 28 8 2 +PREHOOK: query: EXPLAIN +SELECT T1.key +FROM T1 JOIN T2 +ON T1.key = T2.key +ORDER BY T1.key +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1)) (TOK_TABREF (TOK_TABNAME T2)) (= (. (TOK_TABLE_OR_COL T1) key) (. (TOK_TABLE_OR_COL T2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL T1) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + t2 + TableScan + alias: t2 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT T1.key +FROM T1 JOIN T2 +ON T1.key = T2.key +ORDER BY T1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Table:t2 +Columns:key + +Table:t1 +Columns:key + +1 +2 +3 +7 +8 +8 +PREHOOK: query: SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = T2.val +ORDER BY T1.key, T1.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Table:t2 +Columns:key,val + +Table:t1 +Columns:key,val + +PREHOOK: query: -- Map join +SELECT /*+ MAPJOIN(a) */ * +FROM T1 a JOIN T2 b +ON a.key = b.key +ORDER BY a.key, a.val, b.key, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Table:t2 +Columns:key,val + +Table:t1 +Columns:key,val + +1 11 1 1 +2 12 2 1 +3 13 3 1 +7 17 7 1 +8 18 8 2 +8 28 8 2 +PREHOOK: query: -- More joins +EXPLAIN +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 +ORDER BY T1.key, T1.val +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1)) (TOK_TABREF (TOK_TABNAME T2)) (and (AND (= (. (TOK_TABLE_OR_COL T1) key) (. (TOK_TABLE_OR_COL T2) key)) (= (. (TOK_TABLE_OR_COL T1) val) 3)) (= (. (TOK_TABLE_OR_COL T2) val) 3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) val))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Filter Operator + predicate: + expr: (val = 3.0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: val + type: string + t2 + TableScan + alias: t2 + Filter Operator + predicate: + expr: (val = 3.0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + expr: val + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 +ORDER BY T1.key, T1.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Table:t2 +Columns:key,val + +Table:t1 +Columns:key,val + +PREHOOK: query: EXPLAIN +SELECT subq1.val +FROM +( + SELECT val FROM T1 WHERE key = 5 +) subq1 +JOIN +( + SELECT val FROM T2 WHERE key = 6 +) subq2 +ON subq1.val = subq2.val +ORDER BY subq1.val +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 5)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) val) (. (TOK_TABLE_OR_COL subq2) val)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) val))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) val))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq1:t1 + TableScan + alias: t1 + Filter Operator + predicate: + expr: (key = 5.0) + type: boolean + Select Operator + expressions: + expr: val + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + subq2:t2 + TableScan + alias: t2 + Filter Operator + predicate: + expr: (key = 6.0) + type: boolean + Select Operator + expressions: + expr: val + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT subq1.val +FROM +( + SELECT val FROM T1 WHERE key = 5 +) subq1 +JOIN +( + SELECT val FROM T2 WHERE key = 6 +) subq2 +ON subq1.val = subq2.val +ORDER BY subq1.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Table:t2 +Columns:key,val + +Table:t1 +Columns:key,val + +PREHOOK: query: -- Join followed by join +EXPLAIN +SELECT * +FROM +( + SELECT subq1.key as key + FROM + ( + SELECT key, val FROM T1 + ) subq1 + JOIN + ( + SELECT key, 'teststring' as val FROM T2 + ) subq2 + ON subq1.key = subq2.key +) T4 +JOIN T3 +ON T3.key = T4.key +ORDER BY T3.key, T4.key +PREHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 'teststring' val)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) key)))) T4) (TOK_TABREF (TOK_TABNAME T3)) (= (. (TOK_TABLE_OR_COL T3) key) (. (TOK_TABLE_OR_COL T4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T3) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T4) key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + t4:subq1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + t4:subq2:t2 + TableScan + alias: t2 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + t3 + TableScan + alias: t3 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + expr: val + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col0 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * +FROM +( + SELECT subq1.key as key + FROM + ( + SELECT key, val FROM T1 + ) subq1 + JOIN + ( + SELECT key, 'teststring' as val FROM T2 + ) subq2 + ON subq1.key = subq2.key +) T4 +JOIN T3 +ON T3.key = T4.key +ORDER BY T3.key, T4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +Table:t3 +Columns:key,val + +Table:t2 +Columns:key + +Table:t1 +Columns:key + +1 1 11.0 +2 2 12.0 +3 3 13.0 +7 7 17.0 +8 8 46.0 +8 8 46.0 Index: ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java (working copy) @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.hooks; + +import java.util.Arrays; +import java.util.Map; +import java.util.HashMap; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; + +import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.mortbay.log.Log; + +/* + * This hook is used for verifying the column access information + * that is generated and maintained in the QueryPlan object by the + * ColumnAccessAnalyer. All the hook does is print out the columns + * accessed from each table as recorded in the ColumnAccessInfo + * in the QueryPlan. + */ +public class CheckColumnAccessHook implements ExecuteWithHookContext { + + public void run(HookContext hookContext) { + Log.info("Running CheckColumnAccessHook"); + HiveConf conf = hookContext.getConf(); + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == false) { + return; + } + + QueryPlan plan = hookContext.getQueryPlan(); + if (plan == null) { + return; + } + + ColumnAccessInfo columnAccessInfo = hookContext.getQueryPlan().getColumnAccessInfo(); + if (columnAccessInfo == null) { + return; + } + + LogHelper console = SessionState.getConsole(); + Map> tableToColumnAccessMap = + columnAccessInfo.getTableToColumnAccessMap(); + + // We need a new map to ensure output is always produced in the same order. + // This makes tests that use this hook deterministic. + Map outputOrderedMap = new HashMap(); + + for (Map.Entry> tableAccess : tableToColumnAccessMap.entrySet()) { + StringBuilder perTableInfo = new StringBuilder(); + perTableInfo.append("Table:").append(tableAccess.getKey()).append("\n"); + // Sort columns to make output deterministic + String[] columns = new String[tableAccess.getValue().size()]; + tableAccess.getValue().toArray(columns); + Arrays.sort(columns); + perTableInfo.append("Columns:").append(StringUtils.join(columns, ',')) + .append("\n"); + outputOrderedMap.put(tableAccess.getKey(), perTableInfo.toString()); + } + + for (String perOperatorInfo : outputOrderedMap.values()) { + console.printError(perOperatorInfo); + } + } +} Index: ql/src/test/queries/clientpositive/column_access_stats.q =================================================================== --- ql/src/test/queries/clientpositive/column_access_stats.q (revision 0) +++ ql/src/test/queries/clientpositive/column_access_stats.q (working copy) @@ -0,0 +1,158 @@ +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.CheckColumnAccessHook; +SET hive.stats.collect.scancols=true; + +-- This test is used for testing the ColumnAccessAnalyzer + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1; + +CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE; + +-- Simple select queries +SELECT key FROM T1 ORDER BY key; +SELECT key, val FROM T1 ORDER BY key, val; +SELECT 1 FROM T1; + +-- More complicated select queries +EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key; +SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key; +EXPLAIN SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k; +SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k; +SELECT key + 1 as k FROM T1 ORDER BY k; +SELECT key + val as k FROM T1 ORDER BY k; + +-- Work with union +EXPLAIN +SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT val as c FROM T1 +) subq1 ORDER BY c; + +SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT val as c FROM T1 +) subq1 ORDER BY c; + +EXPLAIN +SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT key as c FROM T1 +) subq1 ORDER BY c; + +SELECT * FROM ( +SELECT key as c FROM T1 + UNION ALL +SELECT key as c FROM T1 +) subq1 ORDER BY c; + +-- Work with insert overwrite +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key +INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key; + +-- Simple joins +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key +ORDER BY T1.key, T1.val, T2.key, T2.val; + +EXPLAIN +SELECT T1.key +FROM T1 JOIN T2 +ON T1.key = T2.key +ORDER BY T1.key; + +SELECT T1.key +FROM T1 JOIN T2 +ON T1.key = T2.key +ORDER BY T1.key; + +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = T2.val +ORDER BY T1.key, T1.val; + +-- Map join +SELECT /*+ MAPJOIN(a) */ * +FROM T1 a JOIN T2 b +ON a.key = b.key +ORDER BY a.key, a.val, b.key, b.val; + +-- More joins +EXPLAIN +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 +ORDER BY T1.key, T1.val; + +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 +ORDER BY T1.key, T1.val; + +EXPLAIN +SELECT subq1.val +FROM +( + SELECT val FROM T1 WHERE key = 5 +) subq1 +JOIN +( + SELECT val FROM T2 WHERE key = 6 +) subq2 +ON subq1.val = subq2.val +ORDER BY subq1.val; + +SELECT subq1.val +FROM +( + SELECT val FROM T1 WHERE key = 5 +) subq1 +JOIN +( + SELECT val FROM T2 WHERE key = 6 +) subq2 +ON subq1.val = subq2.val +ORDER BY subq1.val; + +-- Join followed by join +EXPLAIN +SELECT * +FROM +( + SELECT subq1.key as key + FROM + ( + SELECT key, val FROM T1 + ) subq1 + JOIN + ( + SELECT key, 'teststring' as val FROM T2 + ) subq2 + ON subq1.key = subq2.key +) T4 +JOIN T3 +ON T3.key = T4.key +ORDER BY T3.key, T4.key; + +SELECT * +FROM +( + SELECT subq1.key as key + FROM + ( + SELECT key, val FROM T1 + ) subq1 + JOIN + ( + SELECT key, 'teststring' as val FROM T2 + ) subq2 + ON subq1.key = subq2.key +) T4 +JOIN T3 +ON T3.key = T4.key +ORDER BY T3.key, T4.key; Index: ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (revision 1440586) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (working copy) @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReducerTimeStatsPerJob; @@ -84,6 +85,7 @@ */ protected LineageInfo linfo; private TableAccessInfo tableAccessInfo; + private ColumnAccessInfo columnAccessInfo; private HashMap idToTableNameMap; @@ -113,6 +115,7 @@ outputs = sem.getOutputs(); linfo = sem.getLineageInfo(); tableAccessInfo = sem.getTableAccessInfo(); + columnAccessInfo = sem.getColumnAccessInfo(); idToTableNameMap = new HashMap(sem.getIdToTableNameMap()); queryId = makeQueryId(); @@ -777,6 +780,25 @@ this.tableAccessInfo = tableAccessInfo; } + /** + * Gets the column access information. + * + * @return ColumnAccessInfo associated with the query. + */ + public ColumnAccessInfo getColumnAccessInfo() { + return columnAccessInfo; + } + + /** + * Sets the column access information. + * + * @param columnAccessInfo The ColumnAccessInfo structure that is set immediately after + * the optimization phase. + */ + public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { + this.columnAccessInfo = columnAccessInfo; + } + public QueryProperties getQueryProperties() { return queryProperties; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java (working copy) @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class ColumnAccessInfo { + /** + * Map of table name to names of accessed columns + */ + private final Map> tableToColumnAccessMap; + + public ColumnAccessInfo() { + tableToColumnAccessMap = new HashMap>(); + } + + public void add(String table, String col) { + Set tableColumns = tableToColumnAccessMap.get(table); + if (tableColumns == null) { + tableColumns = new HashSet(); + tableToColumnAccessMap.put(table, tableColumns); + } + tableColumns.add(col); + } + + public Map> getTableToColumnAccessMap() { + return tableToColumnAccessMap; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1440586) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -8473,6 +8473,13 @@ optm.initialize(conf); pCtx = optm.optimize(); + // Generate column access stats if required - wait until column pruning takes place + // during optimization + if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == true) { + ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx); + setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess()); + } + // At this point we have the complete operator tree // from which we want to find the reduce operator genMapRedTasks(pCtx); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 1440586) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -97,6 +97,7 @@ */ protected LineageInfo linfo; protected TableAccessInfo tableAccessInfo; + protected ColumnAccessInfo columnAccessInfo; protected static final String TEXTFILE_INPUT = TextInputFormat.class .getName(); @@ -830,6 +831,25 @@ this.tableAccessInfo = tableAccessInfo; } + /** + * Gets the column access information. + * + * @return ColumnAccessInfo associated with the query. + */ + public ColumnAccessInfo getColumnAccessInfo() { + return columnAccessInfo; + } + + /** + * Sets the column access information. + * + * @param columnAccessInfo The ColumnAccessInfo structure that is set immediately after + * the optimization phase. + */ + public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) { + this.columnAccessInfo = columnAccessInfo; + } + protected HashMap extractPartitionSpecs(Tree partspec) throws SemanticException { HashMap partSpec = new LinkedHashMap(); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java (working copy) @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.metadata.Table; + +public class ColumnAccessAnalyzer { + private static final Log LOG = LogFactory.getLog(ColumnAccessAnalyzer.class.getName()); + private final ParseContext pGraphContext; + + public ColumnAccessAnalyzer() { + pGraphContext = null; + } + + public ColumnAccessAnalyzer(ParseContext pactx) { + pGraphContext = pactx; + } + + public ColumnAccessInfo analyzeColumnAccess() throws SemanticException { + ColumnAccessInfo columnAccessInfo = new ColumnAccessInfo(); + Map topOps = pGraphContext.getTopToTable(); + for (TableScanOperator op : topOps.keySet()) { + Table table = topOps.get(op); + String tableName = table.getTableName(); + List tableCols = table.getAllCols(); + for (int i : op.getNeededColumnIDs()) { + columnAccessInfo.add(tableName, tableCols.get(i).getName()); + } + } + return columnAccessInfo; + } +}