diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 9558d31..d213731 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -408,8 +408,31 @@ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc VectorExpression expr = null; switch (mode) { case FILTER: - //Important: It will come here only if the column is being used as a boolean - expr = new SelectColumnIsTrue(columnNum); + // Evaluate the column as a boolean, converting if necessary. + TypeInfo typeInfo = exprDesc.getTypeInfo(); + if (typeInfo.getCategory() == Category.PRIMITIVE && + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { + expr = new SelectColumnIsTrue(columnNum); + } else { + // Ok, we need to convert. + ArrayList exprAsList = new ArrayList(1); + exprAsList.add(exprDesc); + + // First try our cast method that will handle a few special cases. + VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList); + if (castToBooleanExpr == null) { + + // Ok, try the UDF. + castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, + Mode.PROJECTION, null); + if (castToBooleanExpr == null) { + throw new HiveException("Cannot vectorize converting expression " + + exprDesc.getExprString() + " to boolean"); + } + } + expr = new SelectColumnIsTrue(castToBooleanExpr.getOutputColumn()); + expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr}); + } break; case PROJECTION: expr = new IdentityExpression(columnNum, exprDesc.getTypeString()); diff --git ql/src/test/queries/clientpositive/vector_empty_where.q ql/src/test/queries/clientpositive/vector_empty_where.q new file mode 100644 index 0000000..0543a65 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_empty_where.q @@ -0,0 +1,23 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +-- HIVE- +explain +select count (distinct cint) from alltypesorc where cstring1; + +select count (distinct cint) from alltypesorc where cstring1; + +explain +select count (distinct cint) from alltypesorc where cint; + +select count (distinct cint) from alltypesorc where cint; + +explain +select count (distinct cint) from alltypesorc where cfloat; + +select count (distinct cint) from alltypesorc where cfloat; + +explain +select count (distinct cint) from alltypesorc where ctimestamp1; + +select count (distinct cint) from alltypesorc where ctimestamp1; diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out new file mode 100644 index 0000000..8f694da --- /dev/null +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -0,0 +1,254 @@ +PREHOOK: query: -- HIVE- +explain +select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE- +explain +select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring1 (type: string) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT cint) + keys: cint (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6041 +PREHOOK: query: explain +select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT cint) + keys: cint (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6082 +PREHOOK: query: explain +select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cfloat (type: float) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT cint) + keys: cint (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022 +PREHOOK: query: explain +select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctimestamp1 (type: timestamp) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: cint + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT cint) + keys: cint (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col0:0._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3022