diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index cfedf35..7fbbb65 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -648,8 +648,13 @@ protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass if (op instanceof TableScanOperator) { TableScanOperator ts = (TableScanOperator) op; // push down projections. - ColumnProjectionUtils.appendReadColumns( - jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns()); + //Need to read all columns when analyzing a rcfile table + if (!(ts.getConf().isGatherStats() + && inputFormatClass.isAssignableFrom(RCFileInputFormat.class) + && ts.getNeededColumnIDs().size() == 0) ) { + ColumnProjectionUtils.appendReadColumns( + jobConf, ts.getNeededColumnIDs(), ts.getNeededColumns()); + } // push down filters pushFilters(jobConf, ts); diff --git ql/src/test/queries/clientpositive/analyze_rcfile.q ql/src/test/queries/clientpositive/analyze_rcfile.q index e69de29..527162f 100644 --- ql/src/test/queries/clientpositive/analyze_rcfile.q +++ ql/src/test/queries/clientpositive/analyze_rcfile.q @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1(name string, age int); +CREATE TABLE test2(name string, age int) stored as rcfile; + +LOAD DATA LOCAL INPATH '../../data/files/test1.txt' INTO TABLE test1; +FROM test1 INSERT OVERWRITE TABLE test2 SELECT test1.name, test1.age; + +ANALYZE TABLE test2 COMPUTE STATISTICS; + + +DESC FORMATTED test2; + +-- Another way to show stats. +EXPLAIN EXTENDED select * from test2; + +ANALYZE TABLE test2 COMPUTE STATISTICS partialscan; + + +DESC FORMATTED test2; + +-- Another way to show stats. +EXPLAIN EXTENDED select * from test2; + +DROP TABLE test1; +DROP TABLE test2; diff --git ql/src/test/results/clientpositive/analyze_rcfile.q.out ql/src/test/results/clientpositive/analyze_rcfile.q.out index e69de29..d21f637 100644 --- ql/src/test/results/clientpositive/analyze_rcfile.q.out +++ ql/src/test/results/clientpositive/analyze_rcfile.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: DROP TABLE IF EXISTS test1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS test2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test1(name string, age int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1(name string, age int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: CREATE TABLE test2(name string, age int) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2(name string, age int) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/test1.txt' INTO TABLE test1 +PREHOOK: type: LOADLOCAL +#### A masked pattern was here #### +PREHOOK: Output: default@test1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/test1.txt' INTO TABLE test1 +POSTHOOK: type: LOADLOCAL +#### A masked pattern was here #### +POSTHOOK: Output: default@test1 +PREHOOK: query: FROM test1 INSERT OVERWRITE TABLE test2 SELECT test1.name, test1.age +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Output: default@test2 +POSTHOOK: query: FROM test1 INSERT OVERWRITE TABLE test2 SELECT test1.name, test1.age +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Output: default@test2 +POSTHOOK: Lineage: test2.age SIMPLE [(test1)test1.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: test2.name SIMPLE [(test1)test1.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Output: default@test2 +POSTHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Output: default@test2 +PREHOOK: query: DESC FORMATTED test2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test2 +POSTHOOK: query: DESC FORMATTED test2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test2 +# col_name data_type comment + +name string +age int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 5 + rawDataSize 21 + totalSize 103 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: test2 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: name (type: string), age (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS partialscan +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Output: default@test2 +POSTHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS partialscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Output: default@test2 +PREHOOK: query: DESC FORMATTED test2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test2 +POSTHOOK: query: DESC FORMATTED test2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test2 +# col_name data_type comment + +name string +age int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 5 + rawDataSize 21 + totalSize 103 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: test2 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: name (type: string), age (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: DROP TABLE test1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test1 +PREHOOK: Output: default@test1 +POSTHOOK: query: DROP TABLE test1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test1 +POSTHOOK: Output: default@test1 +PREHOOK: query: DROP TABLE test2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test2 +PREHOOK: Output: default@test2 +POSTHOOK: query: DROP TABLE test2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test2 +POSTHOOK: Output: default@test2