diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index d47d3c202b9308b8e63571ba1fffc25d5b7b1dde..57582824d559e6e4d276f0ca37b7f7967accf4da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -119,7 +119,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty()); boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null) || (desc.getVirtualCols().isEmpty()); - if (noColNeeded && noVCneeded) { + boolean isSkipHF = desc.isNeedSkipHeaderFooters(); + if (noColNeeded && noVCneeded && !isSkipHF) { walkerCtx.setMayBeMetadataOnly(tsOp); } return nd; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 5381247f43201fbbc5ef79e57940edeabb12d368..8cf261db55fc7ec4ed55846ba01a9d7f38faf3ab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.TableSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.serde.serdeConstants; /** @@ -364,4 +365,19 @@ public int getNumBuckets() { public void setNumBuckets(int numBuckets) { this.numBuckets = numBuckets; } + + public boolean isNeedSkipHeaderFooters() { + boolean rtn = false; + if (tableMetadata != null && tableMetadata.getTTable() != null) { + Map params = tableMetadata.getTTable().getParameters(); + if (params != null) { + String skipHVal = params.get(serdeConstants.HEADER_COUNT); + int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal); + String skipFVal = params.get(serdeConstants.FOOTER_COUNT); + int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal); + rtn = (hcount != 0 || fcount !=0 ); + } + } + return rtn; + } } diff --git a/ql/src/test/queries/clientpositive/skiphf_aggr.q b/ql/src/test/queries/clientpositive/skiphf_aggr.q new file mode 100644 index 0000000000000000000000000000000000000000..fcd0b35a03702f759af818136b20c3cc2481333d --- /dev/null +++ b/ql/src/test/queries/clientpositive/skiphf_aggr.q @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS skipHTbl; + +CREATE TABLE skipHTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.header.line.count'='1'); + +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4); +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4); + +SELECT * FROM skipHTbl; + +SELECT DISTINCT b FROM skipHTbl; +SELECT MAX(b) FROM skipHTbl; +SELECT DISTINCT a FROM skipHTbl; + +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1); +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4); + +SELECT DISTINCT b FROM skipHTbl; +SELECT MIN(b) FROM skipHTbl; +SELECT DISTINCT a FROM skipHTbl; + +DROP TABLE IF EXISTS skipFTbl; + +CREATE TABLE skipFTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.footer.line.count'='1'); + +INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4); +INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4); + +SELECT * FROM skipFTbl; + +SELECT DISTINCT b FROM skipFTbl; +SELECT MAX(b) FROM skipFTbl; +SELECT DISTINCT a FROM skipFTbl; + +DROP TABLE skipHTbl; +DROP TABLE skipFTbl; + diff --git a/ql/src/test/results/clientpositive/skiphf_aggr.q.out b/ql/src/test/results/clientpositive/skiphf_aggr.q.out new file mode 100644 index 0000000000000000000000000000000000000000..aeb4b1be33906d8b2ee6bb5e13c4dc6f38956e2d --- /dev/null +++ b/ql/src/test/results/clientpositive/skiphf_aggr.q.out @@ -0,0 +1,267 @@ +PREHOOK: query: DROP TABLE IF EXISTS skipHTbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS skipHTbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE skipHTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.header.line.count'='1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skipHTbl +POSTHOOK: query: CREATE TABLE skipHTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.header.line.count'='1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skipHTbl +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT * FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 1 +3 1 +4 1 +2 2 +3 2 +4 2 +PREHOOK: query: SELECT DISTINCT b FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +1 +2 +PREHOOK: query: SELECT MAX(b) FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT MAX(b) FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT DISTINCT a FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +3 +4 +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT DISTINCT b FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT MIN(b) FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(b) FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT DISTINCT a FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +3 +4 +PREHOOK: query: DROP TABLE IF EXISTS skipFTbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS skipFTbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE skipFTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.footer.line.count'='1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skipFTbl +POSTHOOK: query: CREATE TABLE skipFTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.footer.line.count'='1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skipFTbl +PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@skipftbl@b=1 +POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@skipftbl@b=1 +POSTHOOK: Lineage: skipftbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@skipftbl@b=2 +POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@skipftbl@b=2 +POSTHOOK: Lineage: skipftbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT * FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +1 2 +2 2 +3 2 +PREHOOK: query: SELECT DISTINCT b FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT b FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +1 +2 +PREHOOK: query: SELECT MAX(b) FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT MAX(b) FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT DISTINCT a FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT a FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: DROP TABLE skipHTbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skiphtbl +PREHOOK: Output: default@skiphtbl +POSTHOOK: query: DROP TABLE skipHTbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Output: default@skiphtbl +PREHOOK: query: DROP TABLE skipFTbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skipftbl +PREHOOK: Output: default@skipftbl +POSTHOOK: query: DROP TABLE skipFTbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skipftbl +POSTHOOK: Output: default@skipftbl