diff --git ql/src/test/queries/clientpositive/analyze_rcfile.q ql/src/test/queries/clientpositive/analyze_rcfile.q index e69de29..527162f 100644 --- ql/src/test/queries/clientpositive/analyze_rcfile.q +++ ql/src/test/queries/clientpositive/analyze_rcfile.q @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1(name string, age int); +CREATE TABLE test2(name string, age int) stored as rcfile; + +LOAD DATA LOCAL INPATH '../../data/files/test1.txt' INTO TABLE test1; +FROM test1 INSERT OVERWRITE TABLE test2 SELECT test1.name, test1.age; + +ANALYZE TABLE test2 COMPUTE STATISTICS; + + +DESC FORMATTED test2; + +-- Another way to show stats. +EXPLAIN EXTENDED select * from test2; + +ANALYZE TABLE test2 COMPUTE STATISTICS partialscan; + + +DESC FORMATTED test2; + +-- Another way to show stats. +EXPLAIN EXTENDED select * from test2; + +DROP TABLE test1; +DROP TABLE test2; diff --git ql/src/test/results/clientpositive/analyze_rcfile.q.out ql/src/test/results/clientpositive/analyze_rcfile.q.out index e69de29..d21f637 100644 --- ql/src/test/results/clientpositive/analyze_rcfile.q.out +++ ql/src/test/results/clientpositive/analyze_rcfile.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: DROP TABLE IF EXISTS test1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS test2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test1(name string, age int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1(name string, age int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: CREATE TABLE test2(name string, age int) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2(name string, age int) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/test1.txt' INTO TABLE test1 +PREHOOK: type: LOADLOCAL +#### A masked pattern was here #### +PREHOOK: Output: default@test1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/test1.txt' INTO TABLE test1 +POSTHOOK: type: LOADLOCAL +#### A masked pattern was here #### +POSTHOOK: Output: default@test1 +PREHOOK: query: FROM test1 INSERT OVERWRITE TABLE test2 SELECT test1.name, test1.age +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Output: default@test2 +POSTHOOK: query: FROM test1 INSERT OVERWRITE TABLE test2 SELECT test1.name, test1.age +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Output: default@test2 +POSTHOOK: Lineage: test2.age SIMPLE [(test1)test1.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: test2.name SIMPLE [(test1)test1.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Output: default@test2 +POSTHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Output: default@test2 +PREHOOK: query: DESC FORMATTED test2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test2 +POSTHOOK: query: DESC FORMATTED test2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test2 +# col_name data_type comment + +name string +age int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 5 + rawDataSize 21 + totalSize 103 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: test2 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: name (type: string), age (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS partialscan +PREHOOK: type: QUERY +PREHOOK: Input: default@test2 +PREHOOK: Output: default@test2 +POSTHOOK: query: ANALYZE TABLE test2 COMPUTE STATISTICS partialscan +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2 +POSTHOOK: Output: default@test2 +PREHOOK: query: DESC FORMATTED test2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test2 +POSTHOOK: query: DESC FORMATTED test2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test2 +# col_name data_type comment + +name string +age int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 5 + rawDataSize 21 + totalSize 103 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Another way to show stats. +EXPLAIN EXTENDED select * from test2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + test2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: test2 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: name (type: string), age (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 21 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: DROP TABLE test1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test1 +PREHOOK: Output: default@test1 +POSTHOOK: query: DROP TABLE test1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test1 +POSTHOOK: Output: default@test1 +PREHOOK: query: DROP TABLE test2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test2 +PREHOOK: Output: default@test2 +POSTHOOK: query: DROP TABLE test2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test2 +POSTHOOK: Output: default@test2 diff --git serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java index 0c7ac30..df6dd8b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java @@ -108,8 +108,10 @@ public static void appendReadColumns(Configuration conf, List ids) { newConfStr = newConfStr + StringUtils.COMMA_STR + old; } setReadColumnIDConf(conf, newConfStr); - // Set READ_ALL_COLUMNS to false - conf.setBoolean(READ_ALL_COLUMNS, false); + if (ids.size() > 0) { + // Set READ_ALL_COLUMNS to false + conf.setBoolean(READ_ALL_COLUMNS, false); + } } /** diff --git serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java index 2b81b54..8215b3f 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java +++ serde/src/test/org/apache/hadoop/hive/serde2/TestColumnProjectionUtils.java @@ -66,10 +66,11 @@ public void testReadColumnIds() { actual = ColumnProjectionUtils.getReadColumnIDs(conf); assertEquals(Collections.EMPTY_LIST, actual); // test that setting read column ids set read all columns to false - assertFalse(ColumnProjectionUtils.isReadAllColumns(conf)); + assertTrue(ColumnProjectionUtils.isReadAllColumns(conf)); // add needed columns columnIds.add(1); ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1)); + assertFalse(ColumnProjectionUtils.isReadAllColumns(conf)); assertEquals(columnIds, ColumnProjectionUtils.getReadColumnIDs(conf)); columnIds.add(2); ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(2));