diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 8ab5395..54f2644 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; @@ -121,6 +122,7 @@ public OpTuple(Class descClass, Class> opClass) { VectorReduceSinkOperator.class)); vectorOpvec.add(new OpTuple(FileSinkDesc.class, VectorFileSinkOperator.class)); vectorOpvec.add(new OpTuple(FilterDesc.class, VectorFilterOperator.class)); + vectorOpvec.add(new OpTuple(LimitDesc.class, VectorLimitOperator.class)); } public static Operator getVectorOperator(T conf, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java new file mode 100644 index 0000000..4e47f35 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** + * Limit operator implementation Limits the number of rows to be passed on. + **/ +public class VectorLimitOperator extends LimitOperator { + + private static final long serialVersionUID = 1L; + + public VectorLimitOperator() { + super(); + } + + public VectorLimitOperator(VectorizationContext vContext, OperatorDesc conf) { + this.conf = (LimitDesc) conf; + } + + @Override + public void processOp(Object row, int tag) throws HiveException { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + if (currCount < limit) { + batch.size = Math.min(batch.size, limit - currCount); + forward(row, inputObjInspectors[tag]); + currCount += batch.size; + } else { + setDone(true); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index df1c5a6..b8825d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -402,6 +402,7 @@ private boolean validateOperator(Operator op) { break; case FILESINK: case TABLESCAN: + case LIMIT: ret = true; break; default: @@ -528,6 +529,7 @@ private VectorizationContext getVectorizationContext(Operator 0 limit 7; +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out new file mode 100644 index 0000000..d7a04e8 --- /dev/null +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -0,0 +1,64 @@ +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cbigint)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdouble))) (TOK_WHERE (and (< (TOK_TABLE_OR_COL cbigint) (TOK_TABLE_OR_COL cdouble)) (> (TOK_TABLE_OR_COL cint) 0))) (TOK_LIMIT 7))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc + TableScan + alias: alltypesorc + Filter Operator + predicate: + expr: ((cbigint < cdouble) and (cint > 0)) + type: boolean + Vectorized execution: true + Select Operator + expressions: + expr: cbigint + type: bigint + expr: cdouble + type: double + outputColumnNames: _col0, _col1 + Vectorized execution: true + Limit + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: 7 + + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1887561756 1839.0 +-1887561756 -10011.0 +-1887561756 -13877.0 +-1887561756 10361.0 +-1887561756 -8881.0 +-1887561756 -2281.0 +-1887561756 9531.0