diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 98280c52fe..34d0e27ce6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -425,7 +425,8 @@ minillap.query.files=acid_bucket_pruning.q,\ load_fs2.q,\ llap_stats.q,\ multi_count_distinct_null.q,\ - cttl.q + cttl.q,\ + vector_offset_limit.q minillaplocal.query.files=\ bucket_num_reducers_acid.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 918a69a9b6..79b073f93a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -66,7 +66,9 @@ public VectorizationContext getInputVectorizationContext() { public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; - if (currCount + batch.size < offset) { + // We should skip number of rows equal to offset value + // skip until sum of current read count and current batch size less than or equal offset value + if (currCount + batch.size <= offset) { currCount += batch.size; } else if (currCount >= offset + limit) { setDone(true); diff --git a/ql/src/test/queries/clientpositive/vector_offset_limit.q b/ql/src/test/queries/clientpositive/vector_offset_limit.q new file mode 100644 index 0000000000..dd6a3e7432 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_offset_limit.q @@ -0,0 +1,34 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.sortmerge.join=true; +set hive.default.rcfile.serde=org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; +set hive.enforce.sortmergebucketmapjoin=true; +set hive.exec.reducers.bytes.per.reducer=67108864; +set hive.fetch.output.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +set hive.limit.optimize.enable=true; +set hive.limit.pushdown.memory.usage=0.04; +set hive.llap.io.enabled=true; +set hive.map.aggr.hash.min.reduction=0.99; +set hive.mapjoin.bucket.cache.size=10000; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.merge.mapfiles=false; +set hive.merge.nway.joins=false; +set hive.optimize.bucketmapjoin=true; +set hive.optimize.index.filter=true; +set hive.stats.fetch.bitvector=false; +set hive.stats.fetch.column.stats=true; +set hive.support.quoted.identifiers=none; +set hive.tez.auto.reducer.parallelism=true; +set hive.tez.bucket.pruning=true; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.minmax.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; +set hive.vectorized.groupby.checkinterval=4096; + +drop table if exists TLIMITOFFSET; +create table if not exists TLIMITOFFSET (name string, id int, flag string) STORED AS orc; +create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE TLIMITOFFSETSTAGE; +INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE; + +SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20; +SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 OFFSET 10; diff --git a/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out b/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out new file mode 100644 index 0000000000..06c9682ad8 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: drop table if exists TLIMITOFFSET +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TLIMITOFFSET +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TLIMITOFFSET (name string, id int, flag string) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TLIMITOFFSET +POSTHOOK: query: create table if not exists TLIMITOFFSET (name string, id int, flag string) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TLIMITOFFSET +PREHOOK: query: create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TLIMITOFFSETSTAGE +POSTHOOK: query: create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TLIMITOFFSETSTAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE TLIMITOFFSETSTAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tlimitoffsetstage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE TLIMITOFFSETSTAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tlimitoffsetstage +PREHOOK: query: INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tlimitoffsetstage +PREHOOK: Output: default@tlimitoffset +POSTHOOK: query: INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlimitoffsetstage +POSTHOOK: Output: default@tlimitoffset +POSTHOOK: Lineage: tlimitoffset.flag SIMPLE [(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:flag, type:string, comment:null), ] +POSTHOOK: Lineage: tlimitoffset.id SIMPLE [(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: tlimitoffset.name SIMPLE [(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@tlimitoffset +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlimitoffset +POSTHOOK: Output: hdfs://### HDFS PATH ### +testname1 +testname2 +testname3 +testname4 +testname5 +testname6 +testname7 +testname8 +testname9 +testname10 +testname11 +testname12 +testname13 +testname14 +testname15 +testname16 +testname17 +testname18 +testname19 +testname20 +PREHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 OFFSET 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@tlimitoffset +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 OFFSET 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlimitoffset +POSTHOOK: Output: hdfs://### HDFS PATH ### +testname11 +testname12 +testname13 +testname14 +testname15 +testname16 +testname17 +testname18 +testname19 +testname20 +testname21 +testname22 +testname23 +testname24 +testname25 +testname26 +testname27 +testname28 +testname29 +testname30 diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index c6c39ed28e..e6a6a9475b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -827,7 +827,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over10k_n21 #### A masked pattern was here #### ts s sum_window_0 -2013-03-01 09:11:58.70307 calvin laertes 197097 2013-03-01 09:11:58.70307 calvin steinbeck 262874 2013-03-01 09:11:58.70307 david falkner 328506 2013-03-01 09:11:58.70307 fred nixon 394118 @@ -837,6 +836,7 @@ ts s sum_window_0 2013-03-01 09:11:58.70307 jessica laertes 656771 2013-03-01 09:11:58.70307 jessica polk 722558 2013-03-01 09:11:58.70307 katie king 788310 +2013-03-01 09:11:58.70307 katie white 853920 PREHOOK: query: explain vectorization detail select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k_n21 limit 5 PREHOOK: type: QUERY