diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d213731..886e222 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -477,8 +477,8 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr, mode)) { - ve = getCustomUDFExpression(expr); + if (isCustomUDF(expr)) { + ve = getCustomUDFExpression(expr, mode); } else { // Add cast expression if needed. Child expressions of a udf may return different data types @@ -489,13 +489,20 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), childExpressions, mode, exprDesc.getTypeInfo()); + if (ve == null) { + /* + * Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor. + */ + ve = getCustomUDFExpression(expr, mode); + } } } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); } if (ve == null) { - throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); + throw new HiveException( + "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()); } if (LOG.isDebugEnabled()) { LOG.debug("Input Expression = " + exprDesc.toString() @@ -758,64 +765,6 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException return genericUdf; } - - /* Return true if this is one of a small set of functions for which - * it is significantly easier to use the old code path in vectorized - * mode instead of implementing a new, optimized VectorExpression. - * - * Depending on performance requirements and frequency of use, these - * may be implemented in the future with an optimized VectorExpression. - */ - public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, Mode mode) { - GenericUDF gudf = expr.getGenericUDF(); - if (gudf instanceof GenericUDFBridge) { - GenericUDFBridge bridge = (GenericUDFBridge) gudf; - Class udfClass = bridge.getUdfClass(); - if (udfClass.equals(UDFHex.class) - || udfClass.equals(UDFRegExpExtract.class) - || udfClass.equals(UDFRegExpReplace.class) - || udfClass.equals(UDFConv.class) - || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr)) - || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) - || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) - || udfClass.equals(UDFToString.class) && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } - } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) - - /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because - * of their complexity and generality. In the future, variations of these - * can be optimized to run faster for the vectorized code path. For example, - * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END - * is an example of a GenericUDFCase that has all constant arguments - * except for the first argument. This is probably a common case and a - * good candidate for a fast, special-purpose VectorExpression. Then - * the UDF Adaptor code path could be used as a catch-all for - * non-optimized general cases. - */ - || gudf instanceof GenericUDFCase - || gudf instanceof GenericUDFWhen) { - return true; - } else if (gudf instanceof GenericUDFToChar && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFToVarchar && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFBetween && (mode == Mode.PROJECTION)) { - // between has 4 args here, but can be vectorized like this - return true; - } - return false; - } - public static boolean isCastToIntFamily(Class udfClass) { return udfClass.equals(UDFToByte.class) || udfClass.equals(UDFToShort.class) @@ -1213,36 +1162,38 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List castedChildren = evaluateCastOnConstants(childExpr); childExpr = castedChildren; - //First handle special cases + //First handle special cases. If one of the special case methods cannot handle it, + // it returns null. + VectorExpression ve = null; if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) { - return getBetweenFilterExpression(childExpr, mode, returnType); + ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { - return getInExpression(childExpr, mode, returnType); + ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { - return getIdentityExpression(childExpr); + ve = getIdentityExpression(childExpr); } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { // Coalesce is a special case because it can take variable number of arguments. // Nvl is a specialization of the Coalesce. - return getCoalesceExpression(childExpr, returnType); + ve = getCoalesceExpression(childExpr, returnType); } else if (udf instanceof GenericUDFElt) { // Elt is a special case because it can take variable number of arguments. - return getEltExpression(childExpr, returnType); + ve = getEltExpression(childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { - VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, + ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); - if (v != null) { - return v; - } } else if (udf instanceof GenericUDFToDecimal) { - return getCastToDecimal(childExpr, returnType); + ve = getCastToDecimal(childExpr, returnType); } else if (udf instanceof GenericUDFToChar) { - return getCastToChar(childExpr, returnType); + ve = getCastToChar(childExpr, returnType); } else if (udf instanceof GenericUDFToVarchar) { - return getCastToVarChar(childExpr, returnType); + ve = getCastToVarChar(childExpr, returnType); } else if (udf instanceof GenericUDFTimestamp) { - return getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + } + if (ve != null) { + return ve; } // Now do a general lookup Class udfClass = udf.getClass(); @@ -1252,13 +1203,9 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, isSubstituted = true; } - VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), + ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType); - if (ve == null) { - throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); - } - return ve; } @@ -1623,16 +1570,20 @@ private VectorExpression getInExpression(List childExpr, Mode mode private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, List childExpr, Mode mode, TypeInfo returnType) throws HiveException { Class cl = udf.getUdfClass(); + VectorExpression ve = null; if (isCastToIntFamily(cl)) { - return getCastToLongExpression(childExpr); + ve = getCastToLongExpression(childExpr); } else if (cl.equals(UDFToBoolean.class)) { - return getCastToBoolean(childExpr); + ve = getCastToBoolean(childExpr); } else if (isCastToFloatFamily(cl)) { - return getCastToDoubleExpression(cl, childExpr, returnType); + ve = getCastToDoubleExpression(cl, childExpr, returnType); } else if (cl.equals(UDFToString.class)) { - return getCastToString(childExpr, returnType); + ve = getCastToString(childExpr, returnType); } - return null; + if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) { + ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode); + } + return ve; } private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { @@ -1762,10 +1713,10 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType); } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringToDecimal.class, childExpr, Mode.PROJECTION, returnType); - } else if (isDatetimeFamily(inputType)) { + } else if (inputType.equals("timestamp")) { return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType); } - throw new HiveException("Unhandled cast input type: " + inputType); + throw null; } private VectorExpression getCastToString(List childExpr, TypeInfo returnType) @@ -1790,11 +1741,7 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToString.class, childExpr, Mode.PROJECTION, returnType); } - /* The string type is deliberately omitted -- the planner removes string to string casts. - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToChar(List childExpr, TypeInfo returnType) @@ -1818,12 +1765,7 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType) @@ -1847,12 +1789,7 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, @@ -1875,8 +1812,6 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr) ocm.freeOutputColumn(lenExpr.getOutputColumn()); return lenToBoolExpr; } - // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op - return null; } @@ -1926,8 +1859,6 @@ private VectorExpression getCastToLongExpression(List childExpr) // integer and boolean types require no conversion, so use a no-op return getIdentityExpression(childExpr); } - // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF. - return null; } @@ -2031,9 +1962,13 @@ private VectorExpression getBetweenFilterExpression(List childExpr /* * Return vector expression for a custom (i.e. not built-in) UDF. */ - private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) + private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Mode mode) throws HiveException { + if (mode != Mode.PROJECTION) { + return null; + } + //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF(); List childExprList = expr.getChildren(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 4a156a2..51e7a17 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1681,14 +1681,14 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio if (desc.getChildren() != null) { if (isInExpression && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { - // Don't restrict child expressions for projection. + // Don't restrict child expressions for projection. // Always use loose FILTER mode. if (!validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER)) { return false; } } else { for (ExprNodeDesc d : desc.getChildren()) { - // Don't restrict child expressions for projection. + // Don't restrict child expressions for projection. // Always use loose FILTER mode. if (!validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER)) { return false; @@ -1754,10 +1754,16 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode return false; } } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to vectorize", e); + if (e instanceof HiveException) { + LOG.info(e.getMessage()); + } else { + if (LOG.isDebugEnabled()) { + // Show stack trace. + LOG.debug("Failed to vectorize", e); + } else { + LOG.info("Failed to vectorize", e.getMessage()); + } } - return false; } return true; @@ -2219,7 +2225,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, if (keySerializerClass != org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class) { return false; } - + TableDesc valueTableDesc = desc.getValueSerializeInfo(); Class valueDeserializerClass = valueTableDesc.getDeserializerClass(); if (valueDeserializerClass != org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class) { @@ -2278,7 +2284,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, } else { reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]); } - + vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap); vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos); vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes); @@ -2333,7 +2339,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, } } break; - + case REDUCESINK: { VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 9e0159c..aef46da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -135,6 +135,12 @@ public void setChildren(List children) { public String toString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDF.getClass().getSimpleName()); + if (genericUDF instanceof GenericUDFBridge) { + GenericUDFBridge genericUDFBridge = (GenericUDFBridge) genericUDF; + sb.append(" ==> "); + sb.append(genericUDFBridge.getUdfName()); + sb.append(" "); + } sb.append("("); if (chidren != null) { for (int i = 0; i < chidren.size(); i++) { diff --git ql/src/test/queries/clientpositive/vector_between_columns.q ql/src/test/queries/clientpositive/vector_between_columns.q index 4c83d0a..ba38445 100644 --- ql/src/test/queries/clientpositive/vector_between_columns.q +++ ql/src/test/queries/clientpositive/vector_between_columns.q @@ -6,7 +6,10 @@ set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; @@ -21,9 +24,9 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt; create table TINT stored as orc AS SELECT * FROM TINT_txt; --- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. + + explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint; +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint; +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint; diff --git ql/src/test/results/clientpositive/tez/vector_between_columns.q.out ql/src/test/results/clientpositive/tez/vector_between_columns.q.out index d8f9c8b..d548364 100644 --- ql/src/test/results/clientpositive/tez/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/tez/vector_between_columns.q.out @@ -1,12 +1,18 @@ PREHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@TSINT_txt POSTHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' POSTHOOK: type: CREATETABLE @@ -65,15 +71,11 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain STAGE DEPENDENCIES: @@ -111,8 +113,8 @@ STAGE PLANS: predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -143,18 +145,18 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY PREHOOK: Input: default@tint PREHOOK: Input: default@tsint #### A masked pattern was here #### -POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY POSTHOOK: Input: default@tint POSTHOOK: Input: default@tsint #### A masked pattern was here #### -tint.rnum tsint.rnum -1 1 -2 2 -3 3 -4 4 +tint.rnum tsint.rnum tint.cint tsint.csint +1 1 -1 -1 +2 2 0 0 +3 3 1 1 +4 4 10 10 diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out index 54bad12..bcf1ab6 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out @@ -2156,6 +2156,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out index b7ddf73..de8ce7f 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out @@ -145,6 +145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index a4e8d64..5faa79b 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -1,12 +1,18 @@ PREHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@TSINT_txt POSTHOOK: query: -- SORT_QUERY_RESULTS - +-- +-- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween +-- because the mode = FILTER is not supported yet. +-- create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' POSTHOOK: type: CREATETABLE @@ -65,15 +71,11 @@ POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, t POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ] tint_txt.rnum tint_txt.cint Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY -POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions --- are not constants. We currently do not support the range expressions being columns. -explain -select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: explain +select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY Explain STAGE DEPENDENCIES: @@ -124,8 +126,8 @@ STAGE PLANS: predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -144,18 +146,18 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product -PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint PREHOOK: type: QUERY PREHOOK: Input: default@tint PREHOOK: Input: default@tsint #### A masked pattern was here #### -POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint POSTHOOK: type: QUERY POSTHOOK: Input: default@tint POSTHOOK: Input: default@tsint #### A masked pattern was here #### -tint.rnum tsint.rnum -1 1 -2 2 -3 3 -4 4 +tint.rnum tsint.rnum tint.cint tsint.csint +1 1 -1 -1 +2 2 0 0 +3 3 1 1 +4 4 10 10 diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 9dea502..b99fd10 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -2085,6 +2085,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 805584a..4e24fa6 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -139,6 +139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf1.q.out ql/src/test/results/clientpositive/vector_udf1.q.out index bb02ea7..232d78e 100644 --- ql/src/test/results/clientpositive/vector_udf1.q.out +++ ql/src/test/results/clientpositive/vector_udf1.q.out @@ -766,6 +766,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator