diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index 2f3d735..925ab5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -345,6 +346,13 @@ public RecordReader rows(long offset, long length, boolean[] include public RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames ) throws IOException { + + // if included columns is null, then include all columns + if (include == null && columnNames != null) { + include = new boolean[columnNames.length]; + Arrays.fill(include, true); + } + return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset, length, footer.getTypesList(), codec, bufferSize, include, footer.getRowIndexStride(), sarg, columnNames); diff --git ql/src/test/queries/clientpositive/orc_predicate_pushdown.q ql/src/test/queries/clientpositive/orc_predicate_pushdown.q index df89802..a55448b 100644 --- ql/src/test/queries/clientpositive/orc_predicate_pushdown.q +++ ql/src/test/queries/clientpositive/orc_predicate_pushdown.q @@ -49,6 +49,10 @@ SET hive.optimize.index.filter=false; -- hive.optimize.index.filter is set to true. the explain plan should show filter expression -- in table scan operator. +SELECT * FROM orc_pred WHERE t<100 limit 1; +SET hive.optimize.index.filter=true; +SELECT * FROM orc_pred WHERE t<100 limit 1; + SELECT SUM(HASH(t)) FROM orc_pred WHERE t IS NOT NULL AND t < 0 diff --git ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index ba7cf1a..3daea92 100644 --- ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -277,10 +277,7 @@ PREHOOK: query: -- all the following queries have predicates which are pushed do -- hive.optimize.index.filter is set to true. the explain plan should show filter expression -- in table scan operator. -SELECT SUM(HASH(t)) FROM orc_pred - WHERE t IS NOT NULL - AND t < 0 - AND t > -2 +SELECT * FROM orc_pred WHERE t<100 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_pred #### A masked pattern was here #### @@ -288,7 +285,50 @@ POSTHOOK: query: -- all the following queries have predicates which are pushed d -- hive.optimize.index.filter is set to true. the explain plan should show filter expression -- in table scan operator. -SELECT SUM(HASH(t)) FROM orc_pred +SELECT * FROM orc_pred WHERE t<100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_pred +#### A masked pattern was here #### +POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal, comment:null), ] +POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +19 442 65553 4294967380 26.43 37.77 true alice zipper 2013-03-01 09:11:58.703217 29.62 history +PREHOOK: query: SELECT * FROM orc_pred WHERE t<100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_pred +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM orc_pred WHERE t<100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_pred +#### A masked pattern was here #### +POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal, comment:null), ] +POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +19 442 65553 4294967380 26.43 37.77 true alice zipper 2013-03-01 09:11:58.703217 29.62 history +PREHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred + WHERE t IS NOT NULL + AND t < 0 + AND t > -2 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_pred +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred WHERE t IS NOT NULL AND t < 0 AND t > -2