diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index c13510e..07a6e9d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -131,7 +131,7 @@ public VectorSMBMapJoinOperator(CompilationOpContext ctx, OperatorDesc conf, List keyDesc = desc.getKeys().get(posBigTable); keyExpressions = vContext.getVectorExpressions(keyDesc); - keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); + keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyExpressions); Map> exprs = desc.getExprs(); bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index b7feb1c..57f7c01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1806,6 +1806,25 @@ private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeIn return vectorExpression; } + public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs) + throws HiveException{ + if (vecExprs == null) { + return; + } + final int size = vecExprs.length; + for (int i = 0; i < size; i++) { + VectorExpression vecExpr = vecExprs[i]; + if (vecExpr.getOutputTypeInfo() instanceof DecimalTypeInfo) { + DataTypePhysicalVariation outputDataTypePhysicalVariation = + vecExpr.getOutputDataTypePhysicalVariation(); + if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + vecExprs[i] = + wrapWithDecimal64ToDecimalConversion(vecExpr); + } + } + } + } + public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) throws HiveException { @@ -2903,7 +2922,11 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } else if (isTimestampFamily(inputType)) { return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + + // STRING and VARCHAR types require no conversion, so use a no-op. + // Also, CHAR is stored in BytesColumnVector with trimmed blank padding, so it also + // requires no conversion; + return getIdentityExpression(childExpr); } return null; } @@ -3123,8 +3146,27 @@ private VectorExpression getBetweenExpression(List childExpr, List castChildren = new ArrayList(); boolean wereCastUdfs = false; + Category commonTypeCategory = commonType.getCategory(); for (ExprNodeDesc desc: childExpr.subList(1, 4)) { - if (commonType.equals(desc.getTypeInfo())) { + TypeInfo childTypeInfo = desc.getTypeInfo(); + Category childCategory = childTypeInfo.getCategory(); + + if (childCategory != commonTypeCategory) { + return null; + } + final boolean isNeedsCast; + if (commonTypeCategory == Category.PRIMITIVE) { + + // Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category. + // Otherwise, we generate unnecessary casts. + isNeedsCast = + ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != + ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory(); + } else { + isNeedsCast = !commonType.equals(desc.getTypeInfo()); + } + + if (!isNeedsCast) { castChildren.add(desc); } else { GenericUDF castUdf = getGenericUDFForCast(commonType); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig deleted file mode 100644 index 20cc894..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig +++ /dev/null @@ -1,3771 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.lang.reflect.Constructor; -import java.nio.charset.StandardCharsets; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.hive.common.type.Date; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; -import org.apache.hadoop.hive.ql.exec.vector.expressions.*; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; -import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; -import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.udf.*; -import org.apache.hadoop.hive.ql.udf.generic.*; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.AnnotationUtils; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - -/** - * Context class for vectorization execution. - * Main role is to map column names to column indices and serves as a - * factory class for building vectorized expressions out of descriptors. - * - */ -public class VectorizationContext { - - private static final Logger LOG = LoggerFactory.getLogger( - VectorizationContext.class.getName()); - - private final String contextName; - private final int level; - - VectorExpressionDescriptor vMap; - - private final List initialColumnNames; - private List initialTypeInfos; - private List initialDataTypePhysicalVariations; - - private List projectedColumns; - private List projectionColumnNames; - private Map projectionColumnMap; - - //columnName to column position map - // private final Map columnMap; - private int firstOutputColumnIndex; - - public enum HiveVectorAdaptorUsageMode { - NONE, - CHOSEN, - ALL; - - public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { - String string = HiveConf.getVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_USAGE_MODE); - return valueOf(string.toUpperCase()); - } - } - - private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; - private boolean testVectorAdaptorOverride; - - public enum HiveVectorIfStmtMode { - ADAPTOR, - GOOD, - BETTER; - - public static HiveVectorIfStmtMode getHiveConfValue(HiveConf hiveConf) { - String string = HiveConf.getVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE); - return valueOf(string.toUpperCase()); - } - } - - private HiveVectorIfStmtMode hiveVectorIfStmtMode; - - //when set to true use the overflow checked vector expressions - private boolean useCheckedVectorExpressions; - - private boolean reuseScratchColumns = - HiveConf.ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS.defaultBoolVal; - - private boolean adaptorSuppressEvaluateExceptions; - - private void setHiveConfVars(HiveConf hiveConf) { - hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); - testVectorAdaptorOverride = - HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE); - hiveVectorIfStmtMode = HiveVectorIfStmtMode.getHiveConfValue(hiveConf); - this.reuseScratchColumns = - HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_VECTORIZATION_TESTING_REUSE_SCRATCH_COLUMNS); - this.ocm.setReuseColumns(reuseScratchColumns); - useCheckedVectorExpressions = - HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_CHECKED_EXPRESSIONS); - adaptorSuppressEvaluateExceptions = - HiveConf.getBoolVar( - hiveConf, HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS); - } - - private void copyHiveConfVars(VectorizationContext vContextEnvironment) { - hiveVectorAdaptorUsageMode = vContextEnvironment.hiveVectorAdaptorUsageMode; - testVectorAdaptorOverride = vContextEnvironment.testVectorAdaptorOverride; - hiveVectorIfStmtMode = vContextEnvironment.hiveVectorIfStmtMode; - this.reuseScratchColumns = vContextEnvironment.reuseScratchColumns; - useCheckedVectorExpressions = vContextEnvironment.useCheckedVectorExpressions; - adaptorSuppressEvaluateExceptions = vContextEnvironment.adaptorSuppressEvaluateExceptions; - this.ocm.setReuseColumns(reuseScratchColumns); - } - - // Convenient constructor for initial batch creation takes - // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext( - String contextName, - List initialColumnNames, - List initialTypeInfos, - List initialDataTypePhysicalVariations, - HiveConf hiveConf) { - this.contextName = contextName; - level = 0; - this.initialColumnNames = initialColumnNames; - this.initialTypeInfos = initialTypeInfos; - this.initialDataTypePhysicalVariations = initialDataTypePhysicalVariations; - this.projectionColumnNames = initialColumnNames; - - projectedColumns = new ArrayList(); - projectionColumnMap = new HashMap(); - for (int i = 0; i < this.projectionColumnNames.size(); i++) { - projectedColumns.add(i); - projectionColumnMap.put(projectionColumnNames.get(i), i); - } - - int firstOutputColumnIndex = projectedColumns.size(); - this.ocm = new OutputColumnManager(firstOutputColumnIndex); - this.firstOutputColumnIndex = firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - - if (hiveConf != null) { - setHiveConfVars(hiveConf); - } - } - - // Convenient constructor for initial batch creation takes - // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext(String contextName, List initialColumnNames, - HiveConf hiveConf) { - this.contextName = contextName; - level = 0; - this.initialColumnNames = initialColumnNames; - this.projectionColumnNames = initialColumnNames; - - projectedColumns = new ArrayList(); - projectionColumnMap = new HashMap(); - for (int i = 0; i < this.projectionColumnNames.size(); i++) { - projectedColumns.add(i); - projectionColumnMap.put(projectionColumnNames.get(i), i); - } - - int firstOutputColumnIndex = projectedColumns.size(); - this.ocm = new OutputColumnManager(firstOutputColumnIndex); - this.firstOutputColumnIndex = firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - - if (hiveConf != null) { - setHiveConfVars(hiveConf); - } - } - - public VectorizationContext(String contextName, List initialColumnNames, - VectorizationContext vContextEnvironment) { - this(contextName, initialColumnNames, (HiveConf) null); - copyHiveConfVars(vContextEnvironment); - } - - @VisibleForTesting - public VectorizationContext(String contextName, List initialColumnNames) { - this(contextName, initialColumnNames, (HiveConf) null); - } - - // Constructor to with the individual addInitialColumn method - // followed by a call to finishedAddingInitialColumns. - public VectorizationContext(String contextName, HiveConf hiveConf) { - this.contextName = contextName; - level = 0; - initialColumnNames = new ArrayList(); - projectedColumns = new ArrayList(); - projectionColumnNames = new ArrayList(); - projectionColumnMap = new HashMap(); - this.ocm = new OutputColumnManager(0); - this.firstOutputColumnIndex = 0; - vMap = new VectorExpressionDescriptor(); - - if (hiveConf != null) { - setHiveConfVars(hiveConf); - } - - } - - @VisibleForTesting - public VectorizationContext(String contextName) { - this(contextName, (HiveConf) null); - } - - // Constructor useful making a projection vectorization context. E.g. VectorSelectOperator. - // Use with resetProjectionColumns and addProjectionColumn. - // Keeps existing output column map, etc. - public VectorizationContext(String contextName, VectorizationContext vContext) { - this.contextName = contextName; - level = vContext.level + 1; - this.initialColumnNames = vContext.initialColumnNames; - this.initialTypeInfos = vContext.initialTypeInfos; - this.initialDataTypePhysicalVariations = vContext.initialDataTypePhysicalVariations; - this.projectedColumns = new ArrayList(); - this.projectionColumnNames = new ArrayList(); - this.projectionColumnMap = new HashMap(); - - this.ocm = vContext.ocm; - this.firstOutputColumnIndex = vContext.firstOutputColumnIndex; - vMap = new VectorExpressionDescriptor(); - - copyHiveConfVars(vContext); - } - - // Add an initial column to a vectorization context when - // a vectorized row batch is being created. - public void addInitialColumn(String columnName) { - initialColumnNames.add(columnName); - int index = projectedColumns.size(); - projectedColumns.add(index); - projectionColumnNames.add(columnName); - projectionColumnMap.put(columnName, index); - } - - // Finishes the vectorization context after all the initial - // columns have been added. - @VisibleForTesting - public void finishedAddingInitialColumns() { - int firstOutputColumnIndex = projectedColumns.size(); - this.ocm = new OutputColumnManager(firstOutputColumnIndex); - this.ocm.setReuseColumns(this.reuseScratchColumns); - this.firstOutputColumnIndex = firstOutputColumnIndex; - } - - // Empties the projection columns. - public void resetProjectionColumns() { - projectedColumns = new ArrayList(); - projectionColumnNames = new ArrayList(); - projectionColumnMap = new HashMap(); - } - - // Add a projection column to a projection vectorization context. - public void addProjectionColumn(String columnName, int vectorBatchColIndex) { - if (vectorBatchColIndex < 0) { - throw new RuntimeException("Negative projected column number"); - } - projectedColumns.add(vectorBatchColIndex); - projectionColumnNames.add(columnName); - projectionColumnMap.put(columnName, vectorBatchColIndex); - } - - public void setInitialTypeInfos(List initialTypeInfos) { - this.initialTypeInfos = initialTypeInfos; - final int size = initialTypeInfos.size(); - initialDataTypePhysicalVariations = new ArrayList(size); - for (int i = 0; i < size; i++) { - initialDataTypePhysicalVariations.add(DataTypePhysicalVariation.NONE); - } - } - - public void setInitialDataTypePhysicalVariations( - List initialDataTypePhysicalVariations) { - this.initialDataTypePhysicalVariations = initialDataTypePhysicalVariations; - } - - public List getInitialColumnNames() { - return initialColumnNames; - } - - public List getProjectedColumns() { - return projectedColumns; - } - - public List getProjectionColumnNames() { - return projectionColumnNames; - } - - public Map getProjectionColumnMap() { - return projectionColumnMap; - } - - public TypeInfo[] getInitialTypeInfos() { - return initialTypeInfos.toArray(new TypeInfo[0]); - } - - public TypeInfo getTypeInfo(int columnNum) throws HiveException { - if (initialTypeInfos == null) { - throw new HiveException("initialTypeInfos array is null in contextName " + contextName); - } - final int initialSize = initialTypeInfos.size(); - if (columnNum < initialSize) { - return initialTypeInfos.get(columnNum); - } else { - String typeName = ocm.getScratchTypeName(columnNum); - - // Replace unparsable synonyms. - typeName = VectorizationContext.mapTypeNameSynonyms(typeName); - - // Make CHAR and VARCHAR type info parsable. - if (typeName.equals("char")) { - typeName = "char(" + HiveChar.MAX_CHAR_LENGTH + ")"; - } else if (typeName.equals("varchar")) { - typeName = "varchar(" + HiveVarchar.MAX_VARCHAR_LENGTH + ")"; - } - - TypeInfo typeInfo = - TypeInfoUtils.getTypeInfoFromTypeString(typeName); - return typeInfo; - } - } - - public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) throws HiveException { - if (initialDataTypePhysicalVariations == null) { - return null; - } - if (columnNum < initialDataTypePhysicalVariations.size()) { - return initialDataTypePhysicalVariations.get(columnNum); - } - return ocm.getDataTypePhysicalVariation(columnNum); - } - - public TypeInfo[] getAllTypeInfos() throws HiveException { - final int size = initialTypeInfos.size() + ocm.outputColCount; - - TypeInfo[] result = new TypeInfo[size]; - for (int i = 0; i < size; i++) { - result[i] = getTypeInfo(i); - } - return result; - } - - public static final Pattern decimalTypePattern = Pattern.compile("decimal.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern charTypePattern = Pattern.compile("char.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern varcharTypePattern = Pattern.compile("varchar.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern structTypePattern = Pattern.compile("struct.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern listTypePattern = Pattern.compile("array.*", - Pattern.CASE_INSENSITIVE); - - public static final Pattern mapTypePattern = Pattern.compile("map.*", - Pattern.CASE_INSENSITIVE); - - //Map column number to type (this is always non-null for a useful vec context) - private OutputColumnManager ocm; - - // Set of UDF classes for type casting data types in row-mode. - private static Set> castExpressionUdfs = new HashSet>(); - static { - castExpressionUdfs.add(GenericUDFToString.class); - castExpressionUdfs.add(GenericUDFToDecimal.class); - castExpressionUdfs.add(GenericUDFToBinary.class); - castExpressionUdfs.add(GenericUDFToDate.class); - castExpressionUdfs.add(GenericUDFToUnixTimeStamp.class); - castExpressionUdfs.add(GenericUDFToUtcTimestamp.class); - castExpressionUdfs.add(GenericUDFToChar.class); - castExpressionUdfs.add(GenericUDFToVarchar.class); - castExpressionUdfs.add(GenericUDFTimestamp.class); - castExpressionUdfs.add(GenericUDFToIntervalYearMonth.class); - castExpressionUdfs.add(GenericUDFToIntervalDayTime.class); - castExpressionUdfs.add(UDFToByte.class); - castExpressionUdfs.add(UDFToBoolean.class); - castExpressionUdfs.add(UDFToDouble.class); - castExpressionUdfs.add(UDFToFloat.class); - castExpressionUdfs.add(UDFToInteger.class); - castExpressionUdfs.add(UDFToLong.class); - castExpressionUdfs.add(UDFToShort.class); - } - - // Set of GenericUDFs which require need implicit type casting of decimal parameters. - // Vectorization for mathmatical functions currently depends on decimal params automatically - // being converted to the return type (see getImplicitCastExpression()), which is not correct - // in the general case. This set restricts automatic type conversion to just these functions. - private static Set> udfsNeedingImplicitDecimalCast = new HashSet>(); - static { - udfsNeedingImplicitDecimalCast.add(GenericUDFOPPlus.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPMinus.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPMultiply.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPDivide.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPMod.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFRound.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFBRound.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFFloor.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFCbrt.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFCeil.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFAbs.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFPosMod.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFPower.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFFactorial.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPPositive.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPNegative.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFCoalesce.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFElt.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFGreatest.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFLeast.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFIn.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqual.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualNS.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPNotEqual.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPLessThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrLessThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPGreaterThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrGreaterThan.class); - udfsNeedingImplicitDecimalCast.add(GenericUDFBetween.class); - udfsNeedingImplicitDecimalCast.add(UDFSqrt.class); - udfsNeedingImplicitDecimalCast.add(UDFRand.class); - udfsNeedingImplicitDecimalCast.add(UDFLn.class); - udfsNeedingImplicitDecimalCast.add(UDFLog2.class); - udfsNeedingImplicitDecimalCast.add(UDFSin.class); - udfsNeedingImplicitDecimalCast.add(UDFAsin.class); - udfsNeedingImplicitDecimalCast.add(UDFCos.class); - udfsNeedingImplicitDecimalCast.add(UDFAcos.class); - udfsNeedingImplicitDecimalCast.add(UDFLog10.class); - udfsNeedingImplicitDecimalCast.add(UDFLog.class); - udfsNeedingImplicitDecimalCast.add(UDFExp.class); - udfsNeedingImplicitDecimalCast.add(UDFDegrees.class); - udfsNeedingImplicitDecimalCast.add(UDFRadians.class); - udfsNeedingImplicitDecimalCast.add(UDFAtan.class); - udfsNeedingImplicitDecimalCast.add(UDFTan.class); - udfsNeedingImplicitDecimalCast.add(UDFOPLongDivide.class); - } - - protected boolean needsImplicitCastForDecimal(GenericUDF udf) { - Class udfClass = udf.getClass(); - if (udf instanceof GenericUDFBridge) { - udfClass = ((GenericUDFBridge) udf).getUdfClass(); - } - return udfsNeedingImplicitDecimalCast.contains(udfClass); - } - - public int getInputColumnIndex(String name) throws HiveException { - if (name == null) { - throw new HiveException("Null column name"); - } - if (!projectionColumnMap.containsKey(name)) { - throw new HiveException(String.format("The column %s is not in the vectorization context column map %s.", - name, projectionColumnMap.toString())); - } - final int projectedColumnNum = projectionColumnMap.get(name); - if (projectedColumnNum < 0) { - throw new HiveException("Negative projected column number"); - } - return projectedColumnNum; - } - - protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) throws HiveException { - // Call the regular method since it does error checking. - return getInputColumnIndex(colExpr.getColumn()); - } - - private static class OutputColumnManager { - private final int initialOutputCol; - private int outputColCount = 0; - private boolean reuseScratchColumns = true; - - protected OutputColumnManager(int initialOutputCol) { - this.initialOutputCol = initialOutputCol; - } - - //The complete list of output columns. These should be added to the - //Vectorized row batch for processing. The index in the row batch is - //equal to the index in this array plus initialOutputCol. - //Start with size 100 and double when needed. - private String[] scratchVectorTypeNames = new String[100]; - private DataTypePhysicalVariation[] scratchDataTypePhysicalVariations = - new DataTypePhysicalVariation[100]; - - private final Set usedOutputColumns = new HashSet(); - - int allocateOutputColumn(TypeInfo typeInfo) throws HiveException { - return allocateOutputColumn(typeInfo, DataTypePhysicalVariation.NONE); - } - - int allocateOutputColumn(TypeInfo typeInfo, - DataTypePhysicalVariation dataTypePhysicalVariation) throws HiveException { - - if (initialOutputCol < 0) { - // This is a test calling. - return 0; - } - - // CONCERN: We currently differentiate DECIMAL columns by their precision and scale..., - // which could lead to a lot of extra unnecessary scratch columns. - String vectorTypeName = getScratchName(typeInfo); - int relativeCol = allocateOutputColumnInternal(vectorTypeName, dataTypePhysicalVariation); - return initialOutputCol + relativeCol; - } - - private int allocateOutputColumnInternal(String columnType, DataTypePhysicalVariation dataTypePhysicalVariation) { - for (int i = 0; i < outputColCount; i++) { - - // Re-use an existing, available column of the same required type. - if (usedOutputColumns.contains(i) || - !(scratchVectorTypeNames[i].equalsIgnoreCase(columnType) && - scratchDataTypePhysicalVariations[i] == dataTypePhysicalVariation)) { - continue; - } - //Use i - usedOutputColumns.add(i); - return i; - } - //Out of allocated columns - if (outputColCount < scratchVectorTypeNames.length) { - int newIndex = outputColCount; - scratchVectorTypeNames[outputColCount] = columnType; - scratchDataTypePhysicalVariations[outputColCount++] = dataTypePhysicalVariation; - usedOutputColumns.add(newIndex); - return newIndex; - } else { - //Expand the array - scratchVectorTypeNames = Arrays.copyOf(scratchVectorTypeNames, 2*outputColCount); - scratchDataTypePhysicalVariations = Arrays.copyOf(scratchDataTypePhysicalVariations, 2*outputColCount); - int newIndex = outputColCount; - scratchVectorTypeNames[outputColCount] = columnType; - scratchDataTypePhysicalVariations[outputColCount++] = dataTypePhysicalVariation; - usedOutputColumns.add(newIndex); - return newIndex; - } - } - - void freeOutputColumn(int index) { - if (initialOutputCol < 0 || reuseScratchColumns == false) { - // This is a test - return; - } - int colIndex = index-initialOutputCol; - if (colIndex >= 0) { - usedOutputColumns.remove(index-initialOutputCol); - } - } - - public int[] currentScratchColumns() { - TreeSet treeSet = new TreeSet(); - for (Integer col : usedOutputColumns) { - treeSet.add(initialOutputCol + col); - } - return ArrayUtils.toPrimitive(treeSet.toArray(new Integer[0])); - } - - public String getScratchTypeName(int columnNum) { - return scratchVectorTypeNames[columnNum - initialOutputCol]; - } - - public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) { - if (scratchDataTypePhysicalVariations == null) { - return null; - } - return scratchDataTypePhysicalVariations[columnNum - initialOutputCol]; - } - - // Allow debugging by disabling column reuse (input cols are never reused by design, only - // scratch cols are) - public void setReuseColumns(boolean reuseColumns) { - this.reuseScratchColumns = reuseColumns; - } - } - - public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { - return ocm.allocateOutputColumn(typeInfo); - } - - public int[] currentScratchColumns() { - return ocm.currentScratchColumns(); - } - - private VectorExpression getFilterOnBooleanColumnExpression(ExprNodeColumnDesc exprDesc, - int columnNum) throws HiveException { - VectorExpression expr = null; - - // Evaluate the column as a boolean, converting if necessary. - TypeInfo typeInfo = exprDesc.getTypeInfo(); - if (typeInfo.getCategory() == Category.PRIMITIVE && - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { - expr = new SelectColumnIsTrue(columnNum); - - expr.setInputTypeInfos(typeInfo); - expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - - } else { - // Ok, we need to convert. - ArrayList exprAsList = new ArrayList(1); - exprAsList.add(exprDesc); - - // First try our cast method that will handle a few special cases. - VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList); - if (castToBooleanExpr == null) { - - // Ok, try the UDF. - castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, - VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); - if (castToBooleanExpr == null) { - throw new HiveException("Cannot vectorize converting expression " + - exprDesc.getExprString() + " to boolean"); - } - } - - final int outputColumnNum = castToBooleanExpr.getOutputColumnNum(); - - expr = new SelectColumnIsTrue(outputColumnNum); - - expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr}); - - expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo()); - expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - } - return expr; - } - - private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, - VectorExpressionDescriptor.Mode mode) throws HiveException { - int columnNum = getInputColumnIndex(exprDesc.getColumn()); - VectorExpression expr = null; - switch (mode) { - case FILTER: - expr = getFilterOnBooleanColumnExpression(exprDesc, columnNum); - break; - case PROJECTION: - { - expr = new IdentityExpression(columnNum); - - TypeInfo identityTypeInfo = exprDesc.getTypeInfo(); - DataTypePhysicalVariation identityDataTypePhysicalVariation = - getDataTypePhysicalVariation(columnNum); - - expr.setInputTypeInfos(identityTypeInfo); - expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); - - expr.setOutputTypeInfo(identityTypeInfo); - expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation); - } - break; - default: - throw new RuntimeException("Unexpected mode " + mode); - } - return expr; - } - - public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) - throws HiveException { - VectorExpression[] vecExprs = - getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); - final int size = vecExprs.length; - for (int i = 0; i < size; i++) { - VectorExpression vecExpr = vecExprs[i]; - if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { - vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); - } - } - return vecExprs; - } - - public VectorExpression[] getVectorExpressions(List exprNodes) throws HiveException { - return getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); - } - - public VectorExpression[] getVectorExpressions(List exprNodes, VectorExpressionDescriptor.Mode mode) - throws HiveException { - - int i = 0; - if (null == exprNodes) { - return new VectorExpression[0]; - } - VectorExpression[] ret = new VectorExpression[exprNodes.size()]; - for (ExprNodeDesc e : exprNodes) { - ret[i++] = getVectorExpression(e, mode); - } - return ret; - } - - public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveException { - return getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); - } - - /** - * Returns a vector expression for a given expression - * description. - * @param exprDesc, Expression description - * @param mode - * @return {@link VectorExpression} - * @throws HiveException - */ - public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException { - VectorExpression ve = null; - if (exprDesc instanceof ExprNodeColumnDesc) { - ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); - } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - // push not through between... - if ("not".equals(expr.getFuncText())) { - if (expr.getChildren() != null && expr.getChildren().size() == 1) { - ExprNodeDesc child = expr.getChildren().get(0); - if (child instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc childExpr = (ExprNodeGenericFuncDesc) child; - if ("between".equals(childExpr.getFuncText())) { - ExprNodeConstantDesc flag = (ExprNodeConstantDesc) childExpr.getChildren().get(0); - List newChildren = new ArrayList<>(); - if (Boolean.TRUE.equals(flag.getValue())) { - newChildren.add(new ExprNodeConstantDesc(Boolean.FALSE)); - } else { - newChildren.add(new ExprNodeConstantDesc(Boolean.TRUE)); - } - newChildren - .addAll(childExpr.getChildren().subList(1, childExpr.getChildren().size())); - expr.setTypeInfo(childExpr.getTypeInfo()); - expr.setGenericUDF(childExpr.getGenericUDF()); - expr.setChildren(newChildren); - } - } - } - } - // Add cast expression if needed. Child expressions of a udf may return different data types - // and that would require converting their data types to evaluate the udf. - // For example decimal column added to an integer column would require integer column to be - // cast to decimal. - // Note: this is a no-op for custom UDFs - List childExpressions = getChildExpressionsWithImplicitCast(expr.getGenericUDF(), - exprDesc.getChildren(), exprDesc.getTypeInfo()); - - // Are we forcing the usage of VectorUDFAdaptor for test purposes? - if (!testVectorAdaptorOverride) { - ve = getGenericUdfVectorExpression(expr.getGenericUDF(), - childExpressions, mode, exprDesc.getTypeInfo()); - } - if (ve == null) { - // Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor - // when configured. - // - // NOTE: We assume if hiveVectorAdaptorUsageMode has not been set it because we are - // executing a test that didn't create a HiveConf, etc. No usage of VectorUDFAdaptor in - // that case. - if (hiveVectorAdaptorUsageMode != null) { - switch (hiveVectorAdaptorUsageMode) { - case NONE: - // No VectorUDFAdaptor usage. - throw new HiveException( - "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=none"); - case CHOSEN: - if (isNonVectorizedPathUDF(expr, mode)) { - ve = getCustomUDFExpression(expr, mode); - } else { - throw new HiveException( - "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=chosen" - + " and the UDF wasn't one of the chosen ones"); - } - break; - case ALL: - if (LOG.isDebugEnabled()) { - LOG.debug("We will try to use the VectorUDFAdaptor for " + exprDesc.toString() - + " because hive.vectorized.adaptor.usage.mode=all"); - } - ve = getCustomUDFExpression(expr, mode); - break; - default: - throw new RuntimeException("Unknown hive vector adaptor usage mode " + - hiveVectorAdaptorUsageMode.name()); - } - if (ve == null) { - throw new HiveException( - "Unable vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString() - + " even for the VectorUDFAdaptor"); - } - } - } - } else if (exprDesc instanceof ExprNodeConstantDesc) { - ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), - mode); - } else if (exprDesc instanceof ExprNodeDynamicValueDesc) { - ve = getDynamicValueVectorExpression((ExprNodeDynamicValueDesc) exprDesc, mode); - } else if (exprDesc instanceof ExprNodeFieldDesc) { - // Get the GenericUDFStructField to process the field of Struct type - ve = getGenericUDFStructField((ExprNodeFieldDesc)exprDesc, - mode, exprDesc.getTypeInfo()); - } - if (ve == null) { - throw new HiveException( - "Could not vectorize expression (mode = " + mode.name() + "): " + exprDesc.toString()); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Input Expression = " + exprDesc.toString() - + ", Vectorized Expression = " + ve.toString()); - } - - return ve; - } - - private VectorExpression getGenericUDFStructField(ExprNodeFieldDesc exprNodeFieldDesc, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - // set the arguments for GenericUDFStructField - List children = new ArrayList<>(2); - children.add(exprNodeFieldDesc.getDesc()); - children.add(new ExprNodeConstantDesc(getStructFieldIndex(exprNodeFieldDesc))); - - return getVectorExpressionForUdf(null, GenericUDFStructField.class, children, mode, returnType); - } - - /** - * The field of Struct is stored in StructColumnVector.fields[index]. - * Check the StructTypeInfo.getAllStructFieldNames() and compare to the field name, get the index. - */ - private int getStructFieldIndex(ExprNodeFieldDesc exprNodeFieldDesc) throws HiveException { - ExprNodeDesc structNodeDesc = exprNodeFieldDesc.getDesc(); - String fieldName = exprNodeFieldDesc.getFieldName(); - StructTypeInfo structTypeInfo = (StructTypeInfo) structNodeDesc.getTypeInfo(); - int index = 0; - boolean isFieldExist = false; - for (String fn : structTypeInfo.getAllStructFieldNames()) { - if (fieldName.equals(fn)) { - isFieldExist = true; - break; - } - index++; - } - if (isFieldExist) { - return index; - } else { - throw new HiveException("Could not vectorize expression:" + exprNodeFieldDesc.toString() - + ", the field " + fieldName + " doesn't exist."); - } - } - - /** - * Given a udf and its children, return the common type to which the children's type should be - * cast. - */ - private TypeInfo getCommonTypeForChildExpressions(GenericUDF genericUdf, - List children, TypeInfo returnType) throws HiveException { - TypeInfo commonType; - if (genericUdf instanceof GenericUDFBaseCompare) { - - // Apply comparison rules - TypeInfo tLeft = children.get(0).getTypeInfo(); - TypeInfo tRight = children.get(1).getTypeInfo(); - commonType = FunctionRegistry.getCommonClassForComparison(tLeft, tRight); - if (commonType == null) { - commonType = returnType; - } - } else if (genericUdf instanceof GenericUDFIn) { - TypeInfo colTi = children.get(0).getTypeInfo(); - if (colTi.getCategory() != Category.PRIMITIVE) { - return colTi; // Handled later, only struct will be supported. - } - TypeInfo opTi = GenericUDFUtils.deriveInType(children); - if (opTi == null || opTi.getCategory() != Category.PRIMITIVE) { - throw new HiveException("Cannot vectorize IN() - common type is " + opTi); - } - if (((PrimitiveTypeInfo)colTi).getPrimitiveCategory() != - ((PrimitiveTypeInfo)opTi).getPrimitiveCategory()) { - throw new HiveException("Cannot vectorize IN() - casting a column is not supported. " - + "Column type is " + colTi + " but the common type is " + opTi); - } - return colTi; - } else { - // The children type should be converted to return type - commonType = returnType; - } - return commonType; - } - - /** - * Add a cast expression to the expression tree if needed. The output of child expressions of a given UDF might - * need a cast if their return type is different from the return type of the UDF. - * - * @param genericUDF The given UDF - * @param children Child expressions of the UDF that might require a cast. - * @param returnType The return type of the UDF. - * @return List of child expressions added with cast. - */ - private List getChildExpressionsWithImplicitCast(GenericUDF genericUDF, - List children, TypeInfo returnType) throws HiveException { - - if (isCustomUDF(genericUDF.getUdfName())) { - // no implicit casts possible - return children; - } - - if (isExcludedFromCast(genericUDF)) { - // No implicit cast needed - return children; - } - if (children == null) { - return null; - } - - TypeInfo commonType = getCommonTypeForChildExpressions(genericUDF, children, returnType); - - if (commonType == null) { - - // Couldn't determine common type, don't cast - return children; - } - - List childrenWithCasts = new ArrayList(); - boolean atleastOneCastNeeded = false; - if (genericUDF instanceof GenericUDFElt) { - int i = 0; - for (ExprNodeDesc child : children) { - TypeInfo castType = commonType; - if (i++ == 0) { - castType = isIntFamily(child.getTypeString()) ? child.getTypeInfo() : TypeInfoFactory.intTypeInfo; - } - ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, castType); - if (castExpression != null) { - atleastOneCastNeeded = true; - childrenWithCasts.add(castExpression); - } else { - childrenWithCasts.add(child); - } - } - } else { - for (ExprNodeDesc child : children) { - ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType); - if (castExpression != null) { - atleastOneCastNeeded = true; - childrenWithCasts.add(castExpression); - } else { - childrenWithCasts.add(child); - } - } - } - if (atleastOneCastNeeded) { - return childrenWithCasts; - } else { - return children; - } - } - - private boolean isExcludedFromCast(GenericUDF genericUDF) { - boolean ret = castExpressionUdfs.contains(genericUDF.getClass()) - || (genericUDF instanceof GenericUDFRound) || (genericUDF instanceof GenericUDFBetween); - - if (ret) { - return ret; - } - - if (genericUDF instanceof GenericUDFBridge) { - Class udfClass = ((GenericUDFBridge) genericUDF).getUdfClass(); - return castExpressionUdfs.contains(udfClass) - || UDFSign.class.isAssignableFrom(udfClass); - } - return false; - } - - /** - * Creates a DecimalTypeInfo object with appropriate precision and scale for the given - * inputTypeInfo. - */ - private TypeInfo updatePrecision(TypeInfo inputTypeInfo, DecimalTypeInfo returnType) { - if (!(inputTypeInfo instanceof PrimitiveTypeInfo)) { - return returnType; - } - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) inputTypeInfo; - int precision = getPrecisionForType(ptinfo); - // TODO: precision and scale would be practically invalid for string conversion (38,38) - int scale = HiveDecimalUtils.getScaleForType(ptinfo); - return new DecimalTypeInfo(precision, scale); - } - - /** - * The GenericUDFs might need their children output to be cast to the given castType. - * This method returns a cast expression that would achieve the required casting. - */ - private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) - throws HiveException { - TypeInfo inputTypeInfo = child.getTypeInfo(); - String inputTypeString = inputTypeInfo.getTypeName(); - String castTypeString = castType.getTypeName(); - - if (inputTypeString.equals(castTypeString)) { - // Nothing to be done - return null; - } - boolean inputTypeDecimal = false; - boolean castTypeDecimal = false; - if (decimalTypePattern.matcher(inputTypeString).matches()) { - inputTypeDecimal = true; - } - if (decimalTypePattern.matcher(castTypeString).matches()) { - castTypeDecimal = true; - } - - if (castTypeDecimal && !inputTypeDecimal) { - if (needsImplicitCastForDecimal(udf)) { - // Cast the input to decimal - // If castType is decimal, try not to lose precision for numeric types. - castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType); - GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal(); - castToDecimalUDF.setTypeInfo(castType); - List children = new ArrayList(); - children.add(child); - ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children); - return desc; - } - } else if (!castTypeDecimal && inputTypeDecimal) { - if (needsImplicitCastForDecimal(udf)) { - // Cast decimal input to returnType - GenericUDF genericUdf = getGenericUDFForCast(castType); - List children = new ArrayList(); - children.add(child); - ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children); - return desc; - } - } else { - - // Casts to exact types including long to double etc. are needed in some special cases. - if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl - || udf instanceof GenericUDFElt) { - GenericUDF genericUdf = getGenericUDFForCast(castType); - List children = new ArrayList(); - children.add(child); - ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children); - return desc; - } - } - return null; - } - - private int getPrecisionForType(PrimitiveTypeInfo typeInfo) { - if (isFloatFamily(typeInfo.getTypeName())) { - return HiveDecimal.MAX_PRECISION; - } - return HiveDecimalUtils.getPrecisionForType(typeInfo); - } - - public static GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException { - UDF udfClass = null; - GenericUDF genericUdf = null; - switch (((PrimitiveTypeInfo) castType).getPrimitiveCategory()) { - case BYTE: - udfClass = new UDFToByte(); - break; - case SHORT: - udfClass = new UDFToShort(); - break; - case INT: - udfClass = new UDFToInteger(); - break; - case LONG: - udfClass = new UDFToLong(); - break; - case FLOAT: - udfClass = new UDFToFloat(); - break; - case DOUBLE: - udfClass = new UDFToDouble(); - break; - case STRING: - genericUdf = new GenericUDFToString(); - break; - case CHAR: - genericUdf = new GenericUDFToChar(); - break; - case VARCHAR: - genericUdf = new GenericUDFToVarchar(); - break; - case BOOLEAN: - udfClass = new UDFToBoolean(); - break; - case DATE: - genericUdf = new GenericUDFToDate(); - break; - case TIMESTAMP: - genericUdf = new GenericUDFTimestamp(); - break; - case INTERVAL_YEAR_MONTH: - genericUdf = new GenericUDFToIntervalYearMonth(); - break; - case INTERVAL_DAY_TIME: - genericUdf = new GenericUDFToIntervalDayTime(); - break; - case BINARY: - genericUdf = new GenericUDFToBinary(); - break; - case DECIMAL: - genericUdf = new GenericUDFToDecimal(); - break; - case VOID: - case UNKNOWN: - // fall-through to throw exception, its not expected for execution to reach here. - break; - } - if (genericUdf == null) { - if (udfClass == null) { - throw new HiveException("Could not add implicit cast for type "+castType.getTypeName()); - } - GenericUDFBridge genericUDFBridge = new GenericUDFBridge(); - genericUDFBridge.setUdfClassName(udfClass.getClass().getName()); - genericUDFBridge.setUdfName(udfClass.getClass().getSimpleName()); - genericUdf = genericUDFBridge; - } - if (genericUdf instanceof SettableUDF) { - ((SettableUDF) genericUdf).setTypeInfo(castType); - } - return genericUdf; - } - - /* Return true if this is one of a small set of functions for which - * it is significantly easier to use the old code path in vectorized - * mode instead of implementing a new, optimized VectorExpression. - * - * Depending on performance requirements and frequency of use, these - * may be implemented in the future with an optimized VectorExpression. - */ - public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, - VectorExpressionDescriptor.Mode mode) { - GenericUDF gudf = expr.getGenericUDF(); - if (gudf instanceof GenericUDFBridge) { - GenericUDFBridge bridge = (GenericUDFBridge) gudf; - Class udfClass = bridge.getUdfClass(); - if (udfClass.equals(UDFHex.class) - || udfClass.equals(UDFRegExpExtract.class) - || udfClass.equals(UDFRegExpReplace.class) - || udfClass.equals(UDFConv.class) - || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr)) - || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) - || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))) { - return true; - } - } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) - - /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because - * of their complexity and generality. In the future, variations of these - * can be optimized to run faster for the vectorized code path. For example, - * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END - * is an example of a GenericUDFCase that has all constant arguments - * except for the first argument. This is probably a common case and a - * good candidate for a fast, special-purpose VectorExpression. Then - * the UDF Adaptor code path could be used as a catch-all for - * non-optimized general cases. - */ - || gudf instanceof GenericUDFCase - || gudf instanceof GenericUDFWhen) { - return true; - } else if ((gudf instanceof GenericUDFToString - || gudf instanceof GenericUDFToChar - || gudf instanceof GenericUDFToVarchar) && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFBetween && (mode == VectorExpressionDescriptor.Mode.PROJECTION)) { - // between has 4 args here, but can be vectorized like this - return true; - } - return false; - } - - public static boolean isCastToIntFamily(Class udfClass) { - return udfClass.equals(UDFToByte.class) - || udfClass.equals(UDFToShort.class) - || udfClass.equals(UDFToInteger.class) - || udfClass.equals(UDFToLong.class); - - // Boolean is purposely excluded. - } - - public static boolean isCastToFloatFamily(Class udfClass) { - return udfClass.equals(UDFToDouble.class) - || udfClass.equals(UDFToFloat.class); - } - - // Return the type string of the first argument (argument 0). - public static String arg0Type(ExprNodeGenericFuncDesc expr) { - String type = expr.getChildren().get(0).getTypeString(); - return type; - } - - // Return true if this is a custom UDF or custom GenericUDF. - // This two functions are for use only in the planner. It will fail in a task. - public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) { - return isCustomUDF(expr.getFuncText()); - } - - private static boolean isCustomUDF(String udfName) { - if (udfName == null) { - return false; - } - FunctionInfo funcInfo; - try { - funcInfo = FunctionRegistry.getFunctionInfo(udfName); - } catch (SemanticException e) { - LOG.warn("Failed to load " + udfName, e); - funcInfo = null; - } - if (funcInfo == null) { - return false; - } - boolean isNativeFunc = funcInfo.isNative(); - return !isNativeFunc; - } - - /** - * Handles only the special cases of cast/+ve/-ve operator on a constant. - * @param exprDesc - * @return The same expression if no evaluation done, else return the constant - * expression. - * @throws HiveException - */ - ExprNodeDesc evaluateCastOnConstants(ExprNodeDesc exprDesc) throws HiveException { - if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) { - return exprDesc; - } - - if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) { - return exprDesc; - } - - ExprNodeConstantDesc foldedChild = null; - if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) { - - // try recursive folding - ExprNodeDesc expr = evaluateCastOnConstants(exprDesc.getChildren().get(0)); - if (expr instanceof ExprNodeConstantDesc) { - foldedChild = (ExprNodeConstantDesc) expr; - } - } else { - foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0); - } - - if (foldedChild == null) { - return exprDesc; - } - - ObjectInspector childoi = foldedChild.getWritableObjectInspector(); - GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF(); - - // Only evaluate +ve/-ve or cast on constant or recursive casting. - if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive || - castExpressionUdfs.contains(gudf.getClass()) - || ((gudf instanceof GenericUDFBridge) - && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) { - ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc); - ObjectInspector output = evaluator.initialize(childoi); - Object constant = evaluator.evaluate(null); - Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); - return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java); - } - - return exprDesc; - } - - /* For cast on constant operator in all members of the input list and return new list - * containing results. - */ - private List evaluateCastOnConstants(List childExpr) - throws HiveException { - List evaluatedChildren = new ArrayList(); - if (childExpr != null) { - for (ExprNodeDesc expr : childExpr) { - expr = this.evaluateCastOnConstants(expr); - evaluatedChildren.add(expr); - } - } - return evaluatedChildren; - } - - private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo, - VectorExpressionDescriptor.Mode mode) throws HiveException { - String typeName = typeInfo.getTypeName(); - VectorExpressionDescriptor.ArgumentType vectorArgType = - VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); - if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { - throw new HiveException("No vector argument type for type name " + typeName); - } - int outCol = -1; - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(typeInfo); - } - if (constantValue == null) { - return new ConstantVectorExpression(outCol, typeInfo, true); - } - - // Boolean is special case. - if (typeName.equalsIgnoreCase("boolean")) { - if (mode == VectorExpressionDescriptor.Mode.FILTER) { - if (((Boolean) constantValue).booleanValue()) { - return new FilterConstantBooleanVectorExpression(1); - } else { - return new FilterConstantBooleanVectorExpression(0); - } - } else { - if (((Boolean) constantValue).booleanValue()) { - return new ConstantVectorExpression(outCol, 1, typeInfo); - } else { - return new ConstantVectorExpression(outCol, 0, typeInfo); - } - } - } - - switch (vectorArgType) { - case INT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue(), typeInfo); - case DATE: - return new ConstantVectorExpression(outCol, DateWritableV2.dateToDays((Date) constantValue), typeInfo); - case TIMESTAMP: - return new ConstantVectorExpression(outCol, - ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), typeInfo); - case INTERVAL_YEAR_MONTH: - return new ConstantVectorExpression(outCol, - ((HiveIntervalYearMonth) constantValue).getTotalMonths(), typeInfo); - case INTERVAL_DAY_TIME: - return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue, typeInfo); - case FLOAT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue(), typeInfo); - case DECIMAL: - return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeInfo); - case STRING: - return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes(), typeInfo); - case CHAR: - return new ConstantVectorExpression(outCol, ((HiveChar) constantValue), typeInfo); - case VARCHAR: - return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue), typeInfo); - default: - throw new HiveException("Unsupported constant type: " + typeName + ", object class " + constantValue.getClass().getSimpleName()); - } - } - - private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr, - VectorExpressionDescriptor.Mode mode) throws HiveException { - String typeName = dynamicValueExpr.getTypeInfo().getTypeName(); - VectorExpressionDescriptor.ArgumentType vectorArgType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(typeName); - if (vectorArgType == VectorExpressionDescriptor.ArgumentType.NONE) { - throw new HiveException("No vector argument type for type name " + typeName); - } - int outCol = -1; - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(dynamicValueExpr.getTypeInfo()); - } - - return new DynamicValueVectorExpression(outCol, dynamicValueExpr.getTypeInfo(), dynamicValueExpr.getDynamicValue()); - } - - /** - * Used as a fast path for operations that don't modify their input, like unary + - * and casting boolean to long. IdentityExpression and its children are always - * projections. - */ - private VectorExpression getIdentityExpression(List childExprList) - throws HiveException { - ExprNodeDesc childExpr = childExprList.get(0); - int identityCol; - TypeInfo identityTypeInfo; - DataTypePhysicalVariation identityDataTypePhysicalVariation; - VectorExpression v1 = null; - if (childExpr instanceof ExprNodeGenericFuncDesc) { - v1 = getVectorExpression(childExpr); - identityCol = v1.getOutputColumnNum(); - identityTypeInfo = v1.getOutputTypeInfo(); - identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation(); - } else if (childExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; - identityCol = getInputColumnIndex(colDesc.getColumn()); - identityTypeInfo = colDesc.getTypeInfo(); - - // CONSIDER: Validation of type information - - identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol); - } else { - throw new HiveException("Expression not supported: "+childExpr); - } - - VectorExpression ve = new IdentityExpression(identityCol); - - if (v1 != null) { - ve.setChildExpressions(new VectorExpression [] {v1}); - } - - ve.setInputTypeInfos(identityTypeInfo); - ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); - - ve.setOutputTypeInfo(identityTypeInfo); - ve.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation); - - return ve; - } - - - private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException { - if (exprNodeDesc instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc); - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - return (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64); - } else if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { - - // Is the result Decimal64 precision? - TypeInfo returnType = exprNodeDesc.getTypeInfo(); - if (!checkTypeInfoForDecimal64(returnType)) { - return false; - } - DecimalTypeInfo returnDecimalType = (DecimalTypeInfo) returnType; - - GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF(); - Class udfClass = udf.getClass(); - - // We have a class-level annotation that says whether the UDF's vectorization expressions - // support Decimal64. - VectorizedExpressionsSupportDecimal64 annotation = - AnnotationUtils.getAnnotation(udfClass, VectorizedExpressionsSupportDecimal64.class); - if (annotation == null) { - return false; - } - - // Carefully check the children to make sure they are Decimal64. - List children = exprNodeDesc.getChildren(); - for (ExprNodeDesc childExprNodeDesc : children) { - - // Some cases were converted before calling getVectorExpressionForUdf. - // So, emulate those cases first. - - if (childExprNodeDesc instanceof ExprNodeConstantDesc) { - DecimalTypeInfo childDecimalTypeInfo = - decimalTypeFromCastToDecimal(childExprNodeDesc, returnDecimalType); - if (childDecimalTypeInfo == null) { - return false; - } - if (!checkTypeInfoForDecimal64(childDecimalTypeInfo)) { - return false; - } - continue; - } - - // Otherwise, recurse. - if (!checkExprNodeDescForDecimal64(childExprNodeDesc)) { - return false; - } - } - return true; - } else if (exprNodeDesc instanceof ExprNodeConstantDesc) { - return checkTypeInfoForDecimal64(exprNodeDesc.getTypeInfo()); - } - return false; - } - - private boolean checkTypeInfoForDecimal64(TypeInfo typeInfo) { - if (typeInfo instanceof DecimalTypeInfo) { - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; - return HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.precision()); - } - return false; - } - - public boolean haveCandidateForDecimal64VectorExpression(int numChildren, - List childExpr, TypeInfo returnType) throws HiveException { - - // For now, just 2 Decimal64 inputs and a Decimal64 or boolean output. - return (numChildren == 2 && - checkExprNodeDescForDecimal64(childExpr.get(0)) && - checkExprNodeDescForDecimal64(childExpr.get(1)) && - (checkTypeInfoForDecimal64(returnType) || - returnType.equals(TypeInfoFactory.booleanTypeInfo))); - } - - private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUdf, - Class udfClass, List childExpr, int numChildren, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - - ExprNodeDesc child1 = childExpr.get(0); - ExprNodeDesc child2 = childExpr.get(1); - - DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) child1.getTypeInfo(); - DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) child2.getTypeInfo(); - - DataTypePhysicalVariation dataTypePhysicalVariation1 = DataTypePhysicalVariation.DECIMAL_64; - DataTypePhysicalVariation dataTypePhysicalVariation2 = DataTypePhysicalVariation.DECIMAL_64; - - final int scale1 = decimalTypeInfo1.scale(); - final int scale2 = decimalTypeInfo2.scale(); - - VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder(); - builder.setNumArguments(numChildren); - builder.setMode(mode); - - boolean isColumnScaleEstablished = false; - int columnScale = 0; - boolean hasScalar = false; - builder.setArgumentType(0, ArgumentType.DECIMAL_64); - if (child1 instanceof ExprNodeGenericFuncDesc || - child1 instanceof ExprNodeColumnDesc) { - builder.setInputExpressionType(0, InputExpressionType.COLUMN); - isColumnScaleEstablished = true; - columnScale = scale1; - } else if (child1 instanceof ExprNodeConstantDesc) { - if (isNullConst(child1)) { - - // Cannot handle NULL scalar parameter. - return null; - } - hasScalar = true; - builder.setInputExpressionType(0, InputExpressionType.SCALAR); - } else { - - // Currently, only functions, columns, and scalars supported. - return null; - } - - builder.setArgumentType(1, ArgumentType.DECIMAL_64); - if (child2 instanceof ExprNodeGenericFuncDesc || - child2 instanceof ExprNodeColumnDesc) { - builder.setInputExpressionType(1, InputExpressionType.COLUMN); - if (!isColumnScaleEstablished) { - isColumnScaleEstablished = true; - columnScale = scale2; - } else if (columnScale != scale2) { - - // We only support Decimal64 on 2 columns when the have the same scale. - return null; - } - } else if (child2 instanceof ExprNodeConstantDesc) { - // Cannot have SCALAR, SCALAR. - if (!isColumnScaleEstablished) { - return null; - } - if (isNullConst(child2)) { - - // Cannot handle NULL scalar parameter. - return null; - } - hasScalar = true; - builder.setInputExpressionType(1, InputExpressionType.SCALAR); - } else { - - // Currently, only functions, columns, and scalars supported. - return null; - } - - VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class vectorClass = - this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); - if (vectorClass == null) { - return null; - } - - VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - - /* - * Custom build arguments. - */ - - List children = new ArrayList(); - Object[] arguments = new Object[numChildren]; - - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumnNum(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) { - - VectorExpression filterExpr = - getFilterOnBooleanColumnExpression((ExprNodeColumnDesc) child, colIndex); - if (filterExpr == null) { - return null; - } - - children.add(filterExpr); - } - arguments[i] = colIndex; - } else { - Preconditions.checkState(child instanceof ExprNodeConstantDesc); - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) child; - HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue(); - if (hiveDecimal.scale() > columnScale) { - - // For now, bail out on decimal constants with larger scale than column scale. - return null; - } - final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(columnScale); - arguments[i] = decimal64Scalar; - } - } - - /* - * Instantiate Decimal64 vector expression. - * - * The instantiateExpression method sets the output column and type information. - */ - VectorExpression vectorExpression = - instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments); - if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments); - } - - vectorExpression.setInputTypeInfos(decimalTypeInfo1, decimalTypeInfo2); - vectorExpression.setInputDataTypePhysicalVariations(dataTypePhysicalVariation1, dataTypePhysicalVariation2); - - if ((vectorExpression != null) && !children.isEmpty()) { - vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); - } - - return vectorExpression; - } - - private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, - Class udfClass, List childExpr, VectorExpressionDescriptor.Mode mode, - TypeInfo returnType) throws HiveException { - - int numChildren = (childExpr == null) ? 0 : childExpr.size(); - - if (numChildren > 2 && genericUdf != null && mode == VectorExpressionDescriptor.Mode.FILTER && - ((genericUdf instanceof GenericUDFOPOr) || (genericUdf instanceof GenericUDFOPAnd))) { - - // Special case handling for Multi-OR and Multi-AND. - - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - String childTypeString = child.getTypeString(); - if (childTypeString == null) { - throw new HiveException("Null child type name string"); - } - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - if (columnVectorType != ColumnVector.Type.LONG){ - return null; - } - if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) { - return null; - } - } - Class vclass; - if (genericUdf instanceof GenericUDFOPOr) { - vclass = FilterExprOrExpr.class; - } else if (genericUdf instanceof GenericUDFOPAnd) { - vclass = FilterExprAndExpr.class; - } else { - throw new RuntimeException("Unexpected multi-child UDF"); - } - VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); - } - if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) { - return null; - } - - // Should we intercept here for a possible Decimal64 vector expression class? - if (haveCandidateForDecimal64VectorExpression(numChildren, childExpr, returnType)) { - VectorExpression result = getDecimal64VectorExpressionForUdf(genericUdf, udfClass, - childExpr, numChildren, mode, returnType); - if (result != null) { - return result; - } - // Otherwise, fall through and proceed with non-Decimal64 vector expression classes... - } - - VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder(); - builder.setNumArguments(numChildren); - builder.setMode(mode); - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - TypeInfo childTypeInfo = child.getTypeInfo(); - String childTypeString = childTypeInfo.toString(); - if (childTypeString == null) { - throw new HiveException("Null child type name string"); - } - String undecoratedTypeName = getUndecoratedName(childTypeString); - if (undecoratedTypeName == null) { - throw new HiveException("No match for type string " + childTypeString + " from undecorated type name method"); - } - builder.setArgumentType(i, undecoratedTypeName); - if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc) - || (child instanceof ExprNodeFieldDesc)) { - builder.setInputExpressionType(i, InputExpressionType.COLUMN); - } else if (child instanceof ExprNodeConstantDesc) { - if (isNullConst(child)) { - // Cannot handle NULL scalar parameter. - return null; - } - builder.setInputExpressionType(i, InputExpressionType.SCALAR); - } else if (child instanceof ExprNodeDynamicValueDesc) { - builder.setInputExpressionType(i, InputExpressionType.DYNAMICVALUE); - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } - } - VectorExpressionDescriptor.Descriptor descriptor = builder.build(); - Class vclass = - this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions); - if (vclass == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("No vector udf found for "+udfClass.getSimpleName() + ", descriptor: "+descriptor); - } - return null; - } - VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); - return createVectorExpression(vclass, childExpr, childrenMode, returnType); - } - - private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeInfo resultTypeInfo) - throws HiveException { - Object [] conversionArgs = new Object[1]; - conversionArgs[0] = colIndex; - VectorExpression vectorExpression = - instantiateExpression( - ConvertDecimal64ToDecimal.class, - resultTypeInfo, - DataTypePhysicalVariation.NONE, - conversionArgs); - if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression( - ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, - conversionArgs); - } - - vectorExpression.setInputTypeInfos(resultTypeInfo); - vectorExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.DECIMAL_64); - - return vectorExpression; - } - - public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) - throws HiveException { - - VectorExpression wrapExpression = createDecimal64ToDecimalConversion( - inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo()); - if (inputExpression instanceof IdentityExpression) { - return wrapExpression; - } - - // CONCERN: Leaking scratch column? - VectorExpression[] child = new VectorExpression[1]; - child[0] = inputExpression; - wrapExpression.setChildExpressions(child); - - return wrapExpression; - } - - private VectorExpression createVectorExpression(Class vectorClass, - List childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException { - int numChildren = childExpr == null ? 0: childExpr.size(); - - TypeInfo[] inputTypeInfos = new TypeInfo[numChildren]; - DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren]; - - List children = new ArrayList(); - Object[] arguments = new Object[numChildren]; - - for (int i = 0; i < numChildren; i++) { - ExprNodeDesc child = childExpr.get(i); - TypeInfo childTypeInfo = child.getTypeInfo(); - - inputTypeInfos[i] = childTypeInfo; - inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE; // Assume. - - if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) { - VectorExpression vChild = getVectorExpression(child, childrenMode); - children.add(vChild); - arguments[i] = vChild.getOutputColumnNum(); - - // Update. - inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation(); - } else if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - - // CONSIDER: Validate type information - - if (childTypeInfo instanceof DecimalTypeInfo) { - - // In this method, we must only process non-Decimal64 column vectors. - // Convert Decimal64 columns to regular decimal. - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - - // FUTURE: Can we reuse this conversion? - VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo); - children.add(vChild); - arguments[i] = vChild.getOutputColumnNum(); - - // Update. - inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation(); - continue; - } - } - if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) { - - // In filter mode, the column must be a boolean - SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex); - - selectColumnIsTrue.setInputTypeInfos(childTypeInfo); - selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); - - children.add(selectColumnIsTrue); - } - arguments[i] = colIndex; - } else if (child instanceof ExprNodeConstantDesc) { - Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child); - arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue; - } else if (child instanceof ExprNodeDynamicValueDesc) { - arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue(); - } else { - throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); - } - } - VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); - if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); - } - - vectorExpression.setInputTypeInfos(inputTypeInfos); - vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations); - - if ((vectorExpression != null) && !children.isEmpty()) { - vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); - } - - for (VectorExpression ve : children) { - ocm.freeOutputColumn(ve.getOutputColumnNum()); - } - - return vectorExpression; - } - - private void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, - DataTypePhysicalVariation dataTypePhysicalVariation, Object[] arguments) throws HiveException { - String displayString = "Could not instantiate vector expression class " + vectorClass.getName() + - " for arguments " + Arrays.toString(arguments) + " return type " + - VectorExpression.getTypeName(returnType, dataTypePhysicalVariation); - throw new HiveException(displayString); - } - - private VectorExpressionDescriptor.Mode getChildrenMode(VectorExpressionDescriptor.Mode mode, Class udf) { - if (mode.equals(VectorExpressionDescriptor.Mode.FILTER) && (udf.equals(GenericUDFOPAnd.class) || udf.equals(GenericUDFOPOr.class))) { - return VectorExpressionDescriptor.Mode.FILTER; - } - return VectorExpressionDescriptor.Mode.PROJECTION; - } - - private String getNewInstanceArgumentString(Object [] args) { - if (args == null) { - return "arguments: NULL"; - } - ArrayList argClasses = new ArrayList(); - for (Object obj : args) { - argClasses.add(obj.getClass().getSimpleName()); - } - return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString(); - } - - private static final int STACK_LENGTH_LIMIT = 15; - - public static String getStackTraceAsSingleLine(Throwable e) { - StringBuilder sb = new StringBuilder(); - sb.append(e); - sb.append(" stack trace: "); - StackTraceElement[] stackTrace = e.getStackTrace(); - int length = stackTrace.length; - boolean isTruncated = false; - if (length > STACK_LENGTH_LIMIT) { - length = STACK_LENGTH_LIMIT; - isTruncated = true; - } - for (int i = 0; i < length; i++) { - if (i > 0) { - sb.append(", "); - } - sb.append(stackTrace[i]); - } - if (isTruncated) { - sb.append(", ..."); - } - - // Attempt to cleanup stack trace elements that vary by VM. - String cleaned = sb.toString().replaceAll("GeneratedConstructorAccessor[0-9]*", "GeneratedConstructorAccessor"); - - return cleaned; - } - - public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, - DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) - throws HiveException { - VectorExpression ve = null; - Constructor ctor = getConstructor(vclass); - int numParams = ctor.getParameterTypes().length; - int argsLength = (args == null) ? 0 : args.length; - if (numParams == 0) { - try { - ve = (VectorExpression) ctor.newInstance(); - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - getStackTraceAsSingleLine(ex)); - } - } else if (numParams == argsLength) { - try { - ve = (VectorExpression) ctor.newInstance(args); - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - getStackTraceAsSingleLine(ex)); - } - } else if (numParams == argsLength + 1) { - // Additional argument is needed, which is the outputcolumn. - Object [] newArgs = null; - try { - if (returnTypeInfo == null) { - throw new HiveException("Missing output type information"); - } - String returnTypeName = returnTypeInfo.getTypeName(); - returnTypeName = VectorizationContext.mapTypeNameSynonyms(returnTypeName); - - // Special handling for decimal because decimal types need scale and precision parameter. - // This special handling should be avoided by using returnType uniformly for all cases. - final int outputColumnNum = - ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); - - newArgs = Arrays.copyOf(args, numParams); - newArgs[numParams-1] = outputColumnNum; - - ve = (VectorExpression) ctor.newInstance(newArgs); - - /* - * Caller is responsible for setting children and input type information. - */ - ve.setOutputTypeInfo(returnTypeInfo); - ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation); - - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - getStackTraceAsSingleLine(ex)); - } - } - // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. - if (ve instanceof TruncStringOutput) { - TruncStringOutput truncStringOutput = (TruncStringOutput) ve; - if (returnTypeInfo instanceof BaseCharTypeInfo) { - BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnTypeInfo; - truncStringOutput.setMaxLength(baseCharTypeInfo.getLength()); - } - } - return ve; - } - - private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, - List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - - List castedChildren = evaluateCastOnConstants(childExpr); - childExpr = castedChildren; - - //First handle special cases. If one of the special case methods cannot handle it, - // it returns null. - VectorExpression ve = null; - if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { - ve = getBetweenFilterExpression(childExpr, mode, returnType); - } else if (udf instanceof GenericUDFIn) { - ve = getInExpression(childExpr, mode, returnType); - } else if (udf instanceof GenericUDFIf) { - ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType); - } else if (udf instanceof GenericUDFWhen) { - ve = getWhenExpression(childExpr, mode, returnType); - } else if (udf instanceof GenericUDFOPPositive) { - ve = getIdentityExpression(childExpr); - } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { - - // Coalesce is a special case because it can take variable number of arguments. - // Nvl is a specialization of the Coalesce. - ve = getCoalesceExpression(childExpr, returnType); - } else if (udf instanceof GenericUDFElt) { - - // Elt is a special case because it can take variable number of arguments. - ve = getEltExpression(childExpr, returnType); - } else if (udf instanceof GenericUDFBridge) { - ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, - returnType); - } else if (udf instanceof GenericUDFToString) { - ve = getCastToString(childExpr, returnType); - } else if (udf instanceof GenericUDFToDecimal) { - ve = getCastToDecimal(childExpr, returnType); - } else if (udf instanceof GenericUDFToChar) { - ve = getCastToChar(childExpr, returnType); - } else if (udf instanceof GenericUDFToVarchar) { - ve = getCastToVarChar(childExpr, returnType); - } else if (udf instanceof GenericUDFTimestamp) { - ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); - } - if (ve != null) { - return ve; - } - // Now do a general lookup - Class udfClass = udf.getClass(); - boolean isSubstituted = false; - if (udf instanceof GenericUDFBridge) { - udfClass = ((GenericUDFBridge) udf).getUdfClass(); - isSubstituted = true; - } - - ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), - udfClass, castedChildren, mode, returnType); - - return ve; - } - - private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, - List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType); - - // Replace with the milliseconds conversion - if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestamp) { - ve = createVectorExpression(CastMillisecondsLongToTimestamp.class, - childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - - return ve; - } - - private void freeNonColumns(VectorExpression[] vectorChildren) { - if (vectorChildren == null) { - return; - } - for (VectorExpression v : vectorChildren) { - if (!(v instanceof IdentityExpression)) { - ocm.freeOutputColumn(v.getOutputColumnNum()); - } - } - } - - private VectorExpression getCoalesceExpression(List childExpr, TypeInfo returnType) - throws HiveException { - int[] inputColumns = new int[childExpr.size()]; - VectorExpression[] vectorChildren = - getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); - - final int size = vectorChildren.length; - TypeInfo[] inputTypeInfos = new TypeInfo[size]; - DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size]; - int i = 0; - for (VectorExpression ve : vectorChildren) { - inputColumns[i] = ve.getOutputColumnNum(); - inputTypeInfos[i] = ve.getOutputTypeInfo(); - inputDataTypePhysicalVariations[i++] = ve.getOutputDataTypePhysicalVariation(); - } - - final int outputColumnNum = ocm.allocateOutputColumn(returnType); - VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outputColumnNum); - - vectorCoalesce.setChildExpressions(vectorChildren); - - vectorCoalesce.setInputTypeInfos(inputTypeInfos); - vectorCoalesce.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations); - - vectorCoalesce.setOutputTypeInfo(returnType); - vectorCoalesce.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - freeNonColumns(vectorChildren); - return vectorCoalesce; - } - - private VectorExpression getEltExpression(List childExpr, TypeInfo returnType) - throws HiveException { - int[] inputColumns = new int[childExpr.size()]; - VectorExpression[] vectorChildren = - getVectorExpressions(childExpr, VectorExpressionDescriptor.Mode.PROJECTION); - - final int size = vectorChildren.length; - TypeInfo[] inputTypeInfos = new TypeInfo[size]; - DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size]; - int i = 0; - for (VectorExpression ve : vectorChildren) { - inputColumns[i] = ve.getOutputColumnNum(); - inputTypeInfos[i] = ve.getOutputTypeInfo(); - inputDataTypePhysicalVariations[i++] = ve.getOutputDataTypePhysicalVariation(); - } - - final int outputColumnNum = ocm.allocateOutputColumn(returnType); - VectorElt vectorElt = new VectorElt(inputColumns, outputColumnNum); - - vectorElt.setChildExpressions(vectorChildren); - - vectorElt.setInputTypeInfos(inputTypeInfos); - vectorElt.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations); - - vectorElt.setOutputTypeInfo(returnType); - vectorElt.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - freeNonColumns(vectorChildren); - return vectorElt; - } - - public enum InConstantType { - INT_FAMILY, - TIMESTAMP, - DATE, - FLOAT_FAMILY, - STRING_FAMILY, - DECIMAL - } - - public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCategory primitiveCategory) { - - switch (primitiveCategory) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - return InConstantType.INT_FAMILY; - - case DATE: - return InConstantType.DATE; - - case TIMESTAMP: - return InConstantType.TIMESTAMP; - - case FLOAT: - case DOUBLE: - return InConstantType.FLOAT_FAMILY; - - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - return InConstantType.STRING_FAMILY; - - case DECIMAL: - return InConstantType.DECIMAL; - - - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - // UNDONE: Fall through for these... they don't appear to be supported yet. - default: - throw new RuntimeException("Unexpected primitive type category " + primitiveCategory); - } - } - - private VectorExpression getStructInExpression(List childExpr, ExprNodeDesc colExpr, - TypeInfo colTypeInfo, List inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { - - VectorExpression expr = null; - - StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo; - - ArrayList fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - final int fieldCount = fieldTypeInfos.size(); - ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount]; - InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount]; - for (int f = 0; f < fieldCount; f++) { - TypeInfo fieldTypeInfo = fieldTypeInfos.get(f); - // Only primitive fields supports for now. - if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) { - return null; - } - - // We are going to serialize using the 4 basic types. - ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo); - fieldVectorColumnTypes[f] = fieldVectorColumnType; - - // We currently evaluate the IN (..) constants in special ways. - PrimitiveCategory fieldPrimitiveCategory = - ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory(); - InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory); - fieldInConstantTypes[f] = inConstantType; - } - - Output buffer = new Output(); - BinarySortableSerializeWrite binarySortableSerializeWrite = - new BinarySortableSerializeWrite(fieldCount); - - final int inChildrenCount = inChildren.size(); - byte[][] serializedInChildren = new byte[inChildrenCount][]; - try { - for (int i = 0; i < inChildrenCount; i++) { - final ExprNodeDesc node = inChildren.get(i); - final Object[] constants; - - if (node instanceof ExprNodeConstantDesc) { - ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node; - ConstantObjectInspector output = constNode.getWritableObjectInspector(); - constants = ((List) output.getWritableConstantValue()).toArray(); - } else { - ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node; - ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory - .get(exprNode); - ObjectInspector output = evaluator.initialize(exprNode - .getWritableObjectInspector()); - constants = (Object[]) evaluator.evaluate(null); - } - - binarySortableSerializeWrite.set(buffer); - for (int f = 0; f < fieldCount; f++) { - Object constant = constants[f]; - if (constant == null) { - binarySortableSerializeWrite.writeNull(); - } else { - InConstantType inConstantType = fieldInConstantTypes[f]; - switch (inConstantType) { - case STRING_FAMILY: - { - byte[] bytes; - if (constant instanceof Text) { - Text text = (Text) constant; - bytes = text.getBytes(); - binarySortableSerializeWrite.writeString(bytes, 0, text.getLength()); - } else { - throw new HiveException("Unexpected constant String type " + - constant.getClass().getSimpleName()); - } - } - break; - case INT_FAMILY: - { - long value; - if (constant instanceof IntWritable) { - value = ((IntWritable) constant).get(); - } else if (constant instanceof LongWritable) { - value = ((LongWritable) constant).get(); - } else { - throw new HiveException("Unexpected constant Long type " + - constant.getClass().getSimpleName()); - } - binarySortableSerializeWrite.writeLong(value); - } - break; - - case FLOAT_FAMILY: - { - double value; - if (constant instanceof DoubleWritable) { - value = ((DoubleWritable) constant).get(); - } else { - throw new HiveException("Unexpected constant Double type " + - constant.getClass().getSimpleName()); - } - binarySortableSerializeWrite.writeDouble(value); - } - break; - - // UNDONE... - case DATE: - case TIMESTAMP: - case DECIMAL: - default: - throw new RuntimeException("Unexpected IN constant type " + inConstantType.name()); - } - } - } - serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength()); - } - } catch (Exception e) { - throw new HiveException(e); - } - - // Create a single child representing the scratch column where we will - // generate the serialized keys of the batch. - int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo); - - Class cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class); - - expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - - ((IStringInExpr) expr).setInListValues(serializedInChildren); - - ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol); - ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), - fieldVectorColumnTypes); - - return expr; - } - - /** - * Create a filter or boolean-valued expression for column IN ( ) - */ - private VectorExpression getInExpression(List childExpr, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - ExprNodeDesc colExpr = childExpr.get(0); - List inChildren = childExpr.subList(1, childExpr.size()); - - String colType = colExpr.getTypeString(); - colType = VectorizationContext.mapTypeNameSynonyms(colType); - TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType); - Category category = colTypeInfo.getCategory(); - if (category == Category.STRUCT) { - return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType); - } else if (category != Category.PRIMITIVE) { - return null; - } - - // prepare arguments for createVectorExpression - List childrenForInList = evaluateCastOnConstants(inChildren); - - /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere, - * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined. - * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued - * logic correctly. E.g. NOT (col IN (null)) should be considered UNKNOWN, so that would - * become FALSE in the WHERE clause, and cause the row in question to be filtered out. - * See the discussion in Jira HIVE-5583. - */ - - VectorExpression expr = null; - - // Validate the IN items are only constants. - for (ExprNodeDesc inListChild : childrenForInList) { - if (!(inListChild instanceof ExprNodeConstantDesc)) { - throw new HiveException("Vectorizing IN expression only supported for constant values"); - } - } - - // determine class - Class cl = null; - // TODO: the below assumes that all the arguments to IN are of the same type; - // non-vectorized validates that explicitly during UDF init. - if (isIntFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); - long[] inVals = new long[childrenForInList.size()]; - for (int i = 0; i != inVals.length; i++) { - inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i)); - } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((ILongInExpr) expr).setInListValues(inVals); - } else if (isTimestampFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class); - Timestamp[] inVals = new Timestamp[childrenForInList.size()]; - for (int i = 0; i != inVals.length; i++) { - inVals[i] = getTimestampScalar(childrenForInList.get(i)); - } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((ITimestampInExpr) expr).setInListValues(inVals); - } else if (isStringFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class); - byte[][] inVals = new byte[childrenForInList.size()][]; - for (int i = 0; i != inVals.length; i++) { - inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i)); - } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((IStringInExpr) expr).setInListValues(inVals); - } else if (isFloatFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class); - double[] inValsD = new double[childrenForInList.size()]; - for (int i = 0; i != inValsD.length; i++) { - inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i)); - } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((IDoubleInExpr) expr).setInListValues(inValsD); - } else if (isDecimalFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class); - HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; - for (int i = 0; i != inValsD.length; i++) { - inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( - (ExprNodeConstantDesc) childrenForInList.get(i)); - } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((IDecimalInExpr) expr).setInListValues(inValsD); - } else if (isDateFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); - long[] inVals = new long[childrenForInList.size()]; - for (int i = 0; i != inVals.length; i++) { - inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i)); - } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((ILongInExpr) expr).setInListValues(inVals); - } - - // Return the desired VectorExpression if found. Otherwise, return null to cause - // execution to fall back to row mode. - return expr; - } - - private byte[] getStringScalarAsByteArray(ExprNodeConstantDesc exprNodeConstantDesc) - throws HiveException { - Object o = getScalarValue(exprNodeConstantDesc); - if (o instanceof byte[]) { - return (byte[]) o; - } else if (o instanceof HiveChar) { - HiveChar hiveChar = (HiveChar) o; - try { - return hiveChar.getStrippedValue().getBytes("UTF-8"); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if (o instanceof HiveVarchar) { - HiveVarchar hiveVarchar = (HiveVarchar) o; - try { - return hiveVarchar.getValue().getBytes("UTF-8"); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else { - throw new HiveException("Expected constant argument of string family but found " + - o.getClass().getSimpleName()); - } - } - - private PrimitiveCategory getAnyIntegerPrimitiveCategoryFromUdfClass(Class udfClass) { - if (udfClass.equals(UDFToByte.class)) { - return PrimitiveCategory.BYTE; - } else if (udfClass.equals(UDFToShort.class)) { - return PrimitiveCategory.SHORT; - } else if (udfClass.equals(UDFToInteger.class)) { - return PrimitiveCategory.INT; - } else if (udfClass.equals(UDFToLong.class)) { - return PrimitiveCategory.LONG; - } else { - throw new RuntimeException("Unexpected any integery UDF class " + udfClass.getName()); - } - } - - /** - * Invoke special handling for expressions that can't be vectorized by regular - * descriptor based lookup. - */ - private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, - List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - Class cl = udf.getUdfClass(); - VectorExpression ve = null; - if (isCastToIntFamily(cl)) { - PrimitiveCategory integerPrimitiveCategory = - getAnyIntegerPrimitiveCategoryFromUdfClass(cl); - ve = getCastToLongExpression(childExpr, integerPrimitiveCategory); - } else if (cl.equals(UDFToBoolean.class)) { - ve = getCastToBoolean(childExpr); - } else if (isCastToFloatFamily(cl)) { - ve = getCastToDoubleExpression(cl, childExpr, returnType); - } - if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) { - ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode); - } - return ve; - } - - private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { - - if (null == scalar) { - return null; - } - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - HiveDecimal rawDecimal; - PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory(); - switch (primitiveCategory) { - case FLOAT: - rawDecimal = HiveDecimal.create(String.valueOf(scalar)); - break; - case DOUBLE: - rawDecimal = HiveDecimal.create(String.valueOf(scalar)); - break; - case BYTE: - rawDecimal = HiveDecimal.create((Byte) scalar); - break; - case SHORT: - rawDecimal = HiveDecimal.create((Short) scalar); - break; - case INT: - rawDecimal = HiveDecimal.create((Integer) scalar); - break; - case LONG: - rawDecimal = HiveDecimal.create((Long) scalar); - break; - case STRING: - rawDecimal = HiveDecimal.create((String) scalar); - break; - case CHAR: - rawDecimal = HiveDecimal.create(((HiveChar) scalar).getStrippedValue()); - break; - case VARCHAR: - rawDecimal = HiveDecimal.create(((HiveVarchar) scalar).getValue()); - break; - case DECIMAL: - rawDecimal = (HiveDecimal) scalar; - break; - default: - throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to HiveDecimal"); - } - if (rawDecimal == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Casting constant scalar " + scalar + " to HiveDecimal resulted in null"); - } - return null; - } - return rawDecimal; - } - - private String castConstantToString(Object scalar, TypeInfo type) throws HiveException { - if (null == scalar) { - return null; - } - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - switch (ptinfo.getPrimitiveCategory()) { - case FLOAT: - case DOUBLE: - case BYTE: - case SHORT: - case INT: - case LONG: - return ((Number) scalar).toString(); - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - DecimalTypeInfo decType = (DecimalTypeInfo) type; - return decimalVal.toFormatString(decType.getScale()); - default: - throw new HiveException("Unsupported type "+typename+" for cast to String"); - } - } - - private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException { - if (null == scalar) { - return null; - } - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory(); - switch (primitiveCategory) { - case FLOAT: - case DOUBLE: - case BYTE: - case SHORT: - case INT: - case LONG: - return ((Number) scalar).doubleValue(); - case STRING: - return Double.valueOf((String) scalar); - case CHAR: - return Double.valueOf(((HiveChar) scalar).getStrippedValue()); - case VARCHAR: - return Double.valueOf(((HiveVarchar) scalar).getValue()); - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - return decimalVal.doubleValue(); - default: - throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to DOUBLE"); - } - } - - private Long castConstantToLong(Object scalar, TypeInfo type, - PrimitiveCategory integerPrimitiveCategory) throws HiveException { - if (null == scalar) { - return null; - } - PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type; - String typename = type.getTypeName(); - PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory(); - switch (primitiveCategory) { - case FLOAT: - case DOUBLE: - case BYTE: - case SHORT: - case INT: - case LONG: - return ((Number) scalar).longValue(); - case STRING: - case CHAR: - case VARCHAR: - { - final long longValue; - if (primitiveCategory == PrimitiveCategory.STRING) { - longValue = Long.valueOf((String) scalar); - } else if (primitiveCategory == PrimitiveCategory.CHAR) { - longValue = Long.valueOf(((HiveChar) scalar).getStrippedValue()); - } else { - longValue = Long.valueOf(((HiveVarchar) scalar).getValue()); - } - switch (integerPrimitiveCategory) { - case BYTE: - if (longValue != ((byte) longValue)) { - // Accurate byte value cannot be obtained. - return null; - } - break; - case SHORT: - if (longValue != ((short) longValue)) { - // Accurate short value cannot be obtained. - return null; - } - break; - case INT: - if (longValue != ((int) longValue)) { - // Accurate int value cannot be obtained. - return null; - } - break; - case LONG: - // No range check needed. - break; - default: - throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory); - } - return longValue; - } - case DECIMAL: - HiveDecimal decimalVal = (HiveDecimal) scalar; - switch (integerPrimitiveCategory) { - case BYTE: - if (!decimalVal.isByte()) { - // Accurate byte value cannot be obtained. - return null; - } - break; - case SHORT: - if (!decimalVal.isShort()) { - // Accurate short value cannot be obtained. - return null; - } - break; - case INT: - if (!decimalVal.isInt()) { - // Accurate int value cannot be obtained. - return null; - } - break; - case LONG: - if (!decimalVal.isLong()) { - // Accurate long value cannot be obtained. - return null; - } - break; - default: - throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory); - } - // We only store longs in our LongColumnVector. - return decimalVal.longValue(); - default: - throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to LONG"); - } - } - - /* - * This method must return the decimal TypeInfo for what getCastToDecimal will produce. - */ - private DecimalTypeInfo decimalTypeFromCastToDecimal(ExprNodeDesc exprNodeDesc, - DecimalTypeInfo returnDecimalType) throws HiveException { - - if (exprNodeDesc instanceof ExprNodeConstantDesc) { - // Return a constant vector expression - Object constantValue = ((ExprNodeConstantDesc) exprNodeDesc).getValue(); - HiveDecimal decimalValue = castConstantToDecimal(constantValue, exprNodeDesc.getTypeInfo()); - if (decimalValue == null) { - // Return something. - return returnDecimalType; - } - return new DecimalTypeInfo(decimalValue.precision(), decimalValue.scale()); - } - String inputType = exprNodeDesc.getTypeString(); - if (isIntFamily(inputType) || - isFloatFamily(inputType) || - decimalTypePattern.matcher(inputType).matches() || - isStringFamily(inputType) || - inputType.equals("timestamp")) { - return returnDecimalType; - } - return null; - } - - private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType) - throws HiveException { - ExprNodeDesc child = childExpr.get(0); - String inputType = childExpr.get(0).getTypeString(); - if (child instanceof ExprNodeConstantDesc) { - // Return a constant vector expression - Object constantValue = ((ExprNodeConstantDesc) child).getValue(); - HiveDecimal decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(decimalValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION); - } - if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("float")) { - return createVectorExpression(CastFloatToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("double")) { - return createVectorExpression(CastDoubleToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (decimalTypePattern.matcher(inputType).matches()) { - if (child instanceof ExprNodeColumnDesc) { - int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - - // Do Decimal64 conversion instead. - return createDecimal64ToDecimalConversion(colIndex, returnType); - } else { - return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, - returnType); - } - } else { - return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, - returnType); - } - } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("timestamp")) { - return createVectorExpression(CastTimestampToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - return null; - } - - private VectorExpression getCastToString(List childExpr, TypeInfo returnType) - throws HiveException { - ExprNodeDesc child = childExpr.get(0); - String inputType = childExpr.get(0).getTypeString(); - if (child instanceof ExprNodeConstantDesc) { - // Return a constant vector expression - Object constantValue = ((ExprNodeConstantDesc) child).getValue(); - String strValue = castConstantToString(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(strValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION); - } - if (inputType.equals("boolean")) { - // Boolean must come before the integer family. It's a special case. - return createVectorExpression(CastBooleanToStringViaLongToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("float")) { - return createVectorExpression(CastFloatToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("double")) { - return createVectorExpression(CastDoubleToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isDecimalFamily(inputType)) { - return createVectorExpression(CastDecimalToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToString.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - return null; - } - - private VectorExpression getCastToChar(List childExpr, TypeInfo returnType) - throws HiveException { - ExprNodeDesc child = childExpr.get(0); - String inputType = childExpr.get(0).getTypeString(); - if (child instanceof ExprNodeConstantDesc) { - // Don't do constant folding here. Wait until the optimizer is changed to do it. - // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. - return null; - } - if (inputType.equals("boolean")) { - // Boolean must come before the integer family. It's a special case. - return createVectorExpression(CastBooleanToCharViaLongToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("float")) { - return createVectorExpression(CastFloatToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("double")) { - return createVectorExpression(CastDoubleToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isDecimalFamily(inputType)) { - return createVectorExpression(CastDecimalToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - return null; - } - - private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType) - throws HiveException { - ExprNodeDesc child = childExpr.get(0); - String inputType = childExpr.get(0).getTypeString(); - if (child instanceof ExprNodeConstantDesc) { - // Don't do constant folding here. Wait until the optimizer is changed to do it. - // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. - return null; - } - if (inputType.equals("boolean")) { - // Boolean must come before the integer family. It's a special case. - return createVectorExpression(CastBooleanToVarCharViaLongToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("float")) { - return createVectorExpression(CastFloatToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (inputType.equals("double")) { - return createVectorExpression(CastDoubleToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isDecimalFamily(inputType)) { - return createVectorExpression(CastDecimalToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isDateFamily(inputType)) { - return createVectorExpression(CastDateToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isTimestampFamily(inputType)) { - return createVectorExpression(CastTimestampToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringGroupToVarChar.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - return null; - } - - private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, - TypeInfo returnType) throws HiveException { - ExprNodeDesc child = childExpr.get(0); - String inputType = childExpr.get(0).getTypeString(); - if (child instanceof ExprNodeConstantDesc) { - // Return a constant vector expression - Object constantValue = ((ExprNodeConstantDesc) child).getValue(); - Double doubleValue = castConstantToDouble(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(doubleValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION); - } - if (isIntFamily(inputType)) { - if (udf.equals(UDFToFloat.class)) { - // In order to convert from integer to float correctly, we need to apply the float cast not the double cast (HIVE-13338). - return createVectorExpression(CastLongToFloatViaLongToDouble.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } else { - return createVectorExpression(CastLongToDouble.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - } else if (inputType.equals("timestamp")) { - return createVectorExpression(CastTimestampToDouble.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, - returnType); - } else if (isFloatFamily(inputType)) { - - // float types require no conversion, so use a no-op - return getIdentityExpression(childExpr); - } - return null; - } - - private VectorExpression getCastToBoolean(List childExpr) - throws HiveException { - ExprNodeDesc child = childExpr.get(0); - TypeInfo inputTypeInfo = child.getTypeInfo(); - String inputType = inputTypeInfo.toString(); - if (child instanceof ExprNodeConstantDesc) { - if (null == ((ExprNodeConstantDesc)child).getValue()) { - return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION); - } - // Don't do constant folding here. Wait until the optimizer is changed to do it. - // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. - return null; - } - // Long and double are handled using descriptors, string needs to be specially handled. - if (isStringFamily(inputType)) { - - VectorExpression lenExpr = createVectorExpression(CastStringToBoolean.class, childExpr, - VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); - - return lenExpr; - } - return null; - } - - private VectorExpression getCastToLongExpression(List childExpr, PrimitiveCategory integerPrimitiveCategory) - throws HiveException { - ExprNodeDesc child = childExpr.get(0); - String inputType = childExpr.get(0).getTypeString(); - if (child instanceof ExprNodeConstantDesc) { - // Return a constant vector expression - Object constantValue = ((ExprNodeConstantDesc) child).getValue(); - Long longValue = castConstantToLong(constantValue, child.getTypeInfo(), integerPrimitiveCategory); - return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION); - } - // Float family, timestamp are handled via descriptor based lookup, int family needs - // special handling. - if (isIntFamily(inputType)) { - // integer and boolean types require no conversion, so use a no-op - return getIdentityExpression(childExpr); - } - return null; - } - - /* Get a [NOT] BETWEEN filter expression. This is treated as a special case - * because the NOT is actually specified in the expression tree as the first argument, - * and we don't want any runtime cost for that. So creating the VectorExpression - * needs to be done differently than the standard way where all arguments are - * passed to the VectorExpression constructor. - */ - private VectorExpression getBetweenFilterExpression(List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { - - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - - // Projection mode is not yet supported for [NOT] BETWEEN. Return null so Vectorizer - // knows to revert to row-at-a-time execution. - return null; - } - - boolean hasDynamicValues = false; - - // We don't currently support the BETWEEN ends being columns. They must be scalars. - if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && - (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) { - hasDynamicValues = true; - } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || - !(childExpr.get(3) instanceof ExprNodeConstantDesc)) { - return null; - } - - boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue(); - ExprNodeDesc colExpr = childExpr.get(1); - - // The children after not, might need a cast. Get common types for the two comparisons. - // Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need - // cast - TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), - childExpr.get(2).getTypeInfo()); - if (commonType == null) { - - // Can't vectorize - return null; - } - commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo()); - if (commonType == null) { - - // Can't vectorize - return null; - } - - List castChildren = new ArrayList(); - - for (ExprNodeDesc desc: childExpr.subList(1, 4)) { - if (commonType.equals(desc.getTypeInfo())) { - castChildren.add(desc); - } else { - GenericUDF castUdf = getGenericUDFForCast(commonType); - ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, - Arrays.asList(new ExprNodeDesc[] { desc })); - castChildren.add(engfd); - } - } - String colType = commonType.getTypeName(); - - // prepare arguments for createVectorExpression - List childrenAfterNot = evaluateCastOnConstants(castChildren); - - // determine class - Class cl = null; - if (isIntFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterLongColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); - } else if (isIntFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; - } else if (isFloatFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDoubleColumnBetweenDynamicValue.class : - FilterDoubleColumnBetween.class); - } else if (isFloatFamily(colType) && notKeywordPresent) { - cl = FilterDoubleColumnNotBetween.class; - } else if (colType.equals("string") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterStringColumnBetweenDynamicValue.class : - FilterStringColumnBetween.class); - } else if (colType.equals("string") && notKeywordPresent) { - cl = FilterStringColumnNotBetween.class; - } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterVarCharColumnBetweenDynamicValue.class : - FilterVarCharColumnBetween.class); - } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterVarCharColumnNotBetween.class; - } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterCharColumnBetweenDynamicValue.class : - FilterCharColumnBetween.class); - } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterCharColumnNotBetween.class; - } else if (colType.equals("timestamp") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterTimestampColumnBetweenDynamicValue.class : - FilterTimestampColumnBetween.class); - } else if (colType.equals("timestamp") && notKeywordPresent) { - cl = FilterTimestampColumnNotBetween.class; - } else if (isDecimalFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDecimalColumnBetweenDynamicValue.class : - FilterDecimalColumnBetween.class); - } else if (isDecimalFamily(colType) && notKeywordPresent) { - cl = FilterDecimalColumnNotBetween.class; - } else if (isDateFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDateColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); - } else if (isDateFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; - } - return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); - } - - private boolean isCondExpr(ExprNodeDesc exprNodeDesc) { - if (exprNodeDesc instanceof ExprNodeConstantDesc || - exprNodeDesc instanceof ExprNodeColumnDesc) { - return false; - } - return true; // Requires conditional evaluation for good performance. - } - - private boolean isNullConst(ExprNodeDesc exprNodeDesc) { - //null constant could be typed so we need to check the value - if (exprNodeDesc instanceof ExprNodeConstantDesc && - ((ExprNodeConstantDesc) exprNodeDesc).getValue() == null) { - return true; - } - return false; - } - - private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List childExpr, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - - if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { - return null; - } - - // Add HiveConf variable with 3 modes: - // 1) adaptor: Always use VectorUDFAdaptor for IF statements. - // - // 2) good: Vectorize but don't optimize conditional expressions - // - // 3) better: Vectorize and Optimize conditional expressions. - // - - if (hiveVectorIfStmtMode == HiveVectorIfStmtMode.ADAPTOR) { - return null; - } - - // Align the THEN/ELSE types. - childExpr = - getChildExpressionsWithImplicitCast( - genericUDFIf, - childExpr, - returnType); - - final ExprNodeDesc ifDesc = childExpr.get(0); - final ExprNodeDesc thenDesc = childExpr.get(1); - final ExprNodeDesc elseDesc = childExpr.get(2); - - final boolean isThenNullConst = isNullConst(thenDesc); - final boolean isElseNullConst = isNullConst(elseDesc); - if (isThenNullConst && isElseNullConst) { - - // THEN NULL ELSE NULL: An unusual "case", but possible. - final int outputColumnNum = ocm.allocateOutputColumn(returnType); - - final VectorExpression resultExpr = - new IfExprNullNull( - outputColumnNum); - - resultExpr.setOutputTypeInfo(returnType); - resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - return resultExpr; - } - - final boolean isThenCondExpr = isCondExpr(thenDesc); - final boolean isElseCondExpr = isCondExpr(elseDesc); - - final boolean isOnlyGood = (hiveVectorIfStmtMode == HiveVectorIfStmtMode.GOOD); - - if (isThenNullConst) { - final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); - final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); - - final int outputColumnNum = ocm.allocateOutputColumn(returnType); - - final VectorExpression resultExpr; - if (!isElseCondExpr || isOnlyGood) { - resultExpr = - new IfExprNullColumn( - whenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); - } else { - resultExpr = - new IfExprNullCondExpr( - whenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); - } - - resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, elseExpr}); - - resultExpr.setInputTypeInfos( - whenExpr.getOutputTypeInfo(), - TypeInfoFactory.voidTypeInfo, - elseExpr.getOutputTypeInfo()); - resultExpr.setInputDataTypePhysicalVariations( - whenExpr.getOutputDataTypePhysicalVariation(), - DataTypePhysicalVariation.NONE, - elseExpr.getOutputDataTypePhysicalVariation()); - - resultExpr.setOutputTypeInfo(returnType); - resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - return resultExpr; - } - - if (isElseNullConst) { - final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); - final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); - - final int outputColumnNum = ocm.allocateOutputColumn(returnType); - - final VectorExpression resultExpr; - if (!isThenCondExpr || isOnlyGood) { - resultExpr = - new IfExprColumnNull( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - outputColumnNum); - } else { - resultExpr = - new IfExprCondExprNull( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - outputColumnNum); - } - - resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr}); - - resultExpr.setInputTypeInfos( - whenExpr.getOutputTypeInfo(), - thenExpr.getOutputTypeInfo(), - TypeInfoFactory.voidTypeInfo); - resultExpr.setInputDataTypePhysicalVariations( - whenExpr.getOutputDataTypePhysicalVariation(), - thenExpr.getOutputDataTypePhysicalVariation(), - DataTypePhysicalVariation.NONE); - - resultExpr.setOutputTypeInfo(returnType); - resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - return resultExpr; - } - - if ((isThenCondExpr || isElseCondExpr) && !isOnlyGood) { - final VectorExpression whenExpr = getVectorExpression(ifDesc, mode); - final VectorExpression thenExpr = getVectorExpression(thenDesc, mode); - final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); - - // Only proceed if the THEN/ELSE types were aligned. - if (thenExpr.getOutputColumnVectorType() == elseExpr.getOutputColumnVectorType()) { - - final int outputColumnNum = ocm.allocateOutputColumn(returnType); - - final VectorExpression resultExpr; - if (isThenCondExpr && isElseCondExpr) { - resultExpr = - new IfExprCondExprCondExpr( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); - } else if (isThenCondExpr) { - resultExpr = - new IfExprCondExprColumn( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); - } else { - resultExpr = - new IfExprColumnCondExpr( - whenExpr.getOutputColumnNum(), - thenExpr.getOutputColumnNum(), - elseExpr.getOutputColumnNum(), - outputColumnNum); - } - - resultExpr.setChildExpressions(new VectorExpression[] {whenExpr, thenExpr, elseExpr}); - - resultExpr.setInputTypeInfos( - whenExpr.getOutputTypeInfo(), - thenExpr.getOutputTypeInfo(), - elseExpr.getOutputTypeInfo()); - resultExpr.setInputDataTypePhysicalVariations( - whenExpr.getOutputDataTypePhysicalVariation(), - thenExpr.getOutputDataTypePhysicalVariation(), - elseExpr.getOutputDataTypePhysicalVariation()); - - resultExpr.setOutputTypeInfo(returnType); - resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - return resultExpr; - } - } - - Class udfClass = genericUDFIf.getClass(); - return getVectorExpressionForUdf( - genericUDFIf, udfClass, childExpr, mode, returnType); - } - - private VectorExpression getWhenExpression(List childExpr, - VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { - - if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { - return null; - } - final int size = childExpr.size(); - - final ExprNodeDesc whenDesc = childExpr.get(0); - final ExprNodeDesc thenDesc = childExpr.get(1); - final ExprNodeDesc elseDesc; - - if (size == 2) { - elseDesc = new ExprNodeConstantDesc(returnType, null); - } else if (size == 3) { - elseDesc = childExpr.get(2); - } else { - final GenericUDFWhen udfWhen = new GenericUDFWhen(); - elseDesc = new ExprNodeGenericFuncDesc(returnType, udfWhen, udfWhen.getUdfName(), - childExpr.subList(2, childExpr.size())); - } - - // Transform CASE WHEN with just a THEN/ELSE into an IF statement. - final GenericUDFIf genericUDFIf = new GenericUDFIf(); - final List ifChildExpr = - Arrays.asList(whenDesc, thenDesc, elseDesc); - return getIfExpression(genericUDFIf, ifChildExpr, mode, returnType); - } - - /* - * Return vector expression for a custom (i.e. not built-in) UDF. - */ - private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) - throws HiveException { - - boolean isFilter = false; // Assume. - if (mode == VectorExpressionDescriptor.Mode.FILTER) { - - // Is output type a BOOLEAN? - TypeInfo resultTypeInfo = expr.getTypeInfo(); - if (resultTypeInfo.getCategory() == Category.PRIMITIVE && - ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { - isFilter = true; - } else { - return null; - } - } - - //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF(); - List childExprList = expr.getChildren(); - final int childrenCount = childExprList.size(); - - // argument descriptors - VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount]; - for (int i = 0; i < argDescs.length; i++) { - argDescs[i] = new VectorUDFArgDesc(); - } - - // positions of variable arguments (columns or non-constant expressions) - List variableArgPositions = new ArrayList(); - - // Column numbers of batch corresponding to expression result arguments - List exprResultColumnNums = new ArrayList(); - - // Prepare children - List vectorExprs = new ArrayList(); - - TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount]; - DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount]; - - for (int i = 0; i < childrenCount; i++) { - ExprNodeDesc child = childExprList.get(i); - inputTypeInfos[i] = child.getTypeInfo(); - inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE; - - if (child instanceof ExprNodeGenericFuncDesc) { - VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION); - vectorExprs.add(e); - variableArgPositions.add(i); - exprResultColumnNums.add(e.getOutputColumnNum()); - argDescs[i].setVariable(e.getOutputColumnNum()); - } else if (child instanceof ExprNodeColumnDesc) { - variableArgPositions.add(i); - argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); - } else if (child instanceof ExprNodeConstantDesc) { - // this is a constant (or null) - if (child.getTypeInfo().getCategory() != Category.PRIMITIVE) { - - // Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant. - throw new HiveException( - "Unable to vectorize custom UDF. Complex type constants not supported: " + child); - } - argDescs[i].setConstant((ExprNodeConstantDesc) child); - } else if (child instanceof ExprNodeDynamicValueDesc) { - VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION); - vectorExprs.add(e); - variableArgPositions.add(i); - exprResultColumnNums.add(e.getOutputColumnNum()); - argDescs[i].setVariable(e.getOutputColumnNum()); - } else if (child instanceof ExprNodeFieldDesc) { - // Get the GenericUDFStructField to process the field of Struct type - VectorExpression e = - getGenericUDFStructField( - (ExprNodeFieldDesc) child, VectorExpressionDescriptor.Mode.PROJECTION, - child.getTypeInfo()); - vectorExprs.add(e); - variableArgPositions.add(i); - exprResultColumnNums.add(e.getOutputColumnNum()); - argDescs[i].setVariable(e.getOutputColumnNum()); - } else { - throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " - + child); - } - } - - // Allocate output column and get column number; - TypeInfo resultTypeInfo = expr.getTypeInfo(); - String resultTypeName = resultTypeInfo.getTypeName(); - - final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo()); - - // Make vectorized operator - VectorUDFAdaptor ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs); - ve.setSuppressEvaluateExceptions(adaptorSuppressEvaluateExceptions); - - // Set child expressions - VectorExpression[] childVEs = null; - if (exprResultColumnNums.size() != 0) { - childVEs = new VectorExpression[exprResultColumnNums.size()]; - for (int i = 0; i < childVEs.length; i++) { - childVEs[i] = vectorExprs.get(i); - } - } - ve.setChildExpressions(childVEs); - - ve.setInputTypeInfos(inputTypeInfos); - ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations); - - ve.setOutputTypeInfo(resultTypeInfo); - ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); - - // Free output columns if inputs have non-leaf expression trees. - for (Integer i : exprResultColumnNums) { - ocm.freeOutputColumn(i); - } - - if (isFilter) { - SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum); - - filterVectorExpr.setChildExpressions(new VectorExpression[] {ve}); - - filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo()); - filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation()); - - return filterVectorExpr; - } else { - return ve; - } - } - - public static boolean isStringFamily(String resultType) { - return resultType.equalsIgnoreCase("string") || charVarcharTypePattern.matcher(resultType).matches() || - resultType.equalsIgnoreCase("string_family"); - } - - public static boolean isDatetimeFamily(String resultType) { - return resultType.equalsIgnoreCase("timestamp") || resultType.equalsIgnoreCase("date"); - } - - public static boolean isTimestampFamily(String resultType) { - return resultType.equalsIgnoreCase("timestamp"); - } - - public static boolean isDateFamily(String resultType) { - return resultType.equalsIgnoreCase("date"); - } - - public static boolean isIntervalYearMonthFamily(String resultType) { - return resultType.equalsIgnoreCase("interval_year_month"); - } - - public static boolean isIntervalDayTimeFamily(String resultType) { - return resultType.equalsIgnoreCase("interval_day_time"); - } - - // return true if this is any kind of float - public static boolean isFloatFamily(String resultType) { - return resultType.equalsIgnoreCase("double") - || resultType.equalsIgnoreCase("float"); - } - - // Return true if this data type is handled in the output vector as an integer. - public static boolean isIntFamily(String resultType) { - return resultType.equalsIgnoreCase("tinyint") - || resultType.equalsIgnoreCase("smallint") - || resultType.equalsIgnoreCase("int") - || resultType.equalsIgnoreCase("bigint") - || resultType.equalsIgnoreCase("boolean") - || resultType.equalsIgnoreCase("long"); - } - - public static boolean isDecimalFamily(String colType) { - return decimalTypePattern.matcher(colType).matches(); - } - - private Object getScalarValue(ExprNodeConstantDesc constDesc) - throws HiveException { - String typeString = constDesc.getTypeString(); - if (typeString.equalsIgnoreCase("String")) { - return ((String) constDesc.getValue()).getBytes(StandardCharsets.UTF_8); - } else if (charTypePattern.matcher(typeString).matches()) { - return ((HiveChar) constDesc.getValue()).getStrippedValue().getBytes(StandardCharsets.UTF_8); - } else if (varcharTypePattern.matcher(typeString).matches()) { - return ((HiveVarchar) constDesc.getValue()).getValue().getBytes(StandardCharsets.UTF_8); - } else if (typeString.equalsIgnoreCase("boolean")) { - if (constDesc.getValue().equals(Boolean.valueOf(true))) { - return 1; - } else { - return 0; - } - } else if (decimalTypePattern.matcher(typeString).matches()) { - return constDesc.getValue(); - } else { - return constDesc.getValue(); - } - } - - private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc) - throws HiveException { - Object o = getScalarValue(constDesc); - if (o instanceof Integer) { - return (Integer) o; - } else if (o instanceof Long) { - return (Long) o; - } - throw new HiveException("Unexpected type when converting to long : "+o.getClass().getSimpleName()); - } - - private double getNumericScalarAsDouble(ExprNodeDesc constDesc) - throws HiveException { - Object o = getScalarValue((ExprNodeConstantDesc) constDesc); - if (o instanceof Double) { - return (Double) o; - } else if (o instanceof Float) { - return (Float) o; - } else if (o instanceof Integer) { - return (Integer) o; - } else if (o instanceof Long) { - return (Long) o; - } - throw new HiveException("Unexpected type when converting to double"); - } - - private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws HiveException { - TypeInfo typeInfo = constDesc.getTypeInfo(); - PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); - Object scalarValue = getScalarValue(constDesc); - switch (primitiveCategory) { - case DATE: - return new Long(DateWritableV2.dateToDays((Date) scalarValue)); - case TIMESTAMP: - return ((org.apache.hadoop.hive.common.type.Timestamp) scalarValue).toSqlTimestamp(); - case INTERVAL_YEAR_MONTH: - return ((HiveIntervalYearMonth) scalarValue).getTotalMonths(); - default: - return scalarValue; - } - } - - // Get a timestamp from a string constant or cast - private Timestamp getTimestampScalar(ExprNodeDesc expr) throws HiveException { - if (expr instanceof ExprNodeGenericFuncDesc && - ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) { - return evaluateCastToTimestamp(expr); - } - if (!(expr instanceof ExprNodeConstantDesc)) { - throw new HiveException("Constant timestamp value expected for expression argument. " + - "Non-constant argument not supported for vectorization."); - } - ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr; - String constTypeString = constExpr.getTypeString(); - if (isStringFamily(constTypeString) || isDatetimeFamily(constTypeString)) { - - // create expression tree with type cast from string to timestamp - ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc(); - GenericUDFTimestamp f = new GenericUDFTimestamp(); - expr2.setGenericUDF(f); - ArrayList children = new ArrayList(); - children.add(expr); - expr2.setChildren(children); - - // initialize and evaluate - return evaluateCastToTimestamp(expr2); - } - - throw new HiveException("Udf: unhandled constant type for scalar argument. " - + "Expecting string/date/timestamp."); - } - - private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { - ExprNodeGenericFuncDesc expr2 = (ExprNodeGenericFuncDesc) expr; - ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2); - ObjectInspector output = evaluator.initialize(null); - Object constant = evaluator.evaluate(null); - Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); - - if (!(java instanceof org.apache.hadoop.hive.common.type.Timestamp)) { - throw new HiveException("Udf: failed to convert to timestamp"); - } - Timestamp ts = ((org.apache.hadoop.hive.common.type.Timestamp) java).toSqlTimestamp(); - return ts; - } - - private Constructor getConstructor(Class cl) throws HiveException { - try { - Constructor [] ctors = cl.getDeclaredConstructors(); - if (ctors.length == 1) { - return ctors[0]; - } - Constructor defaultCtor = cl.getConstructor(); - for (Constructor ctor : ctors) { - if (!ctor.equals(defaultCtor)) { - return ctor; - } - } - throw new HiveException("Only default constructor found"); - } catch (Exception ex) { - throw new HiveException(ex); - } - } - - static String getScratchName(TypeInfo typeInfo) throws HiveException { - // For now, leave DECIMAL precision/scale in the name so DecimalColumnVector scratch columns - // don't need their precision/scale adjusted... - if (typeInfo.getCategory() == Category.PRIMITIVE && - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.DECIMAL) { - return typeInfo.getTypeName(); - } - - // And, for Complex Types, also leave the children types in place... - if (typeInfo.getCategory() != Category.PRIMITIVE) { - return typeInfo.getTypeName(); - } - - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - return columnVectorType.name().toLowerCase(); - } - - static String getUndecoratedName(String hiveTypeName) throws HiveException { - VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); - switch (argType) { - case INT_FAMILY: - return "Long"; - case FLOAT_FAMILY: - return "Double"; - case DECIMAL: - return "Decimal"; - case STRING: - return "String"; - case CHAR: - return "Char"; - case VARCHAR: - return "VarChar"; - case BINARY: - return "Binary"; - case DATE: - return "Date"; - case TIMESTAMP: - return "Timestamp"; - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - return hiveTypeName; - case STRUCT: - return "Struct"; - case LIST: - return "List"; - case MAP: - return "Map"; - default: - throw new HiveException("Unexpected hive type name " + hiveTypeName); - } - } - - public static String mapTypeNameSynonyms(String typeName) { - typeName = typeName.toLowerCase(); - if (typeName.equals("long")) { - return "bigint"; - } else if (typeName.equals("string_family")) { - return "string"; - } else { - return typeName; - } - } - - public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) - throws HiveException { - return getColumnVectorTypeFromTypeInfo(typeInfo, DataTypePhysicalVariation.NONE); - } - - public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo, - DataTypePhysicalVariation dataTypePhysicalVariation) - throws HiveException { - switch (typeInfo.getCategory()) { - case STRUCT: - return Type.STRUCT; - case UNION: - return Type.UNION; - case LIST: - return Type.LIST; - case MAP: - return Type.MAP; - case PRIMITIVE: { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - - switch (primitiveCategory) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DATE: - case INTERVAL_YEAR_MONTH: - return ColumnVector.Type.LONG; - - case TIMESTAMP: - return ColumnVector.Type.TIMESTAMP; - - case INTERVAL_DAY_TIME: - return ColumnVector.Type.INTERVAL_DAY_TIME; - - case FLOAT: - case DOUBLE: - return ColumnVector.Type.DOUBLE; - - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - return ColumnVector.Type.BYTES; - - case DECIMAL: - if (dataTypePhysicalVariation != null && - dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { - return ColumnVector.Type.DECIMAL_64; - } else { - return ColumnVector.Type.DECIMAL; - } - - case VOID: - return ColumnVector.Type.VOID; - - default: - throw new HiveException("Unexpected primitive type category " + primitiveCategory); - } - } - default: - throw new HiveException("Unexpected type category " + - typeInfo.getCategory()); - } - } - - public int firstOutputColumnIndex() { - return firstOutputColumnIndex; - } - - public String[] getScratchColumnTypeNames() { - String[] result = new String[ocm.outputColCount]; - for (int i = 0; i < ocm.outputColCount; i++) { - String vectorTypeName = ocm.scratchVectorTypeNames[i]; - String typeName; - if (vectorTypeName.equalsIgnoreCase("bytes")) { - // Use hive type name. - typeName = "string"; - } else if (vectorTypeName.equalsIgnoreCase("long")) { - // Use hive type name. - typeName = "bigint"; - } else { - typeName = vectorTypeName; - } - result[i] = typeName; - } - return result; - } - - public DataTypePhysicalVariation[] getScratchDataTypePhysicalVariations() { - return Arrays.copyOf(ocm.scratchDataTypePhysicalVariations, ocm.outputColCount); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(32); - sb.append("Context name ").append(contextName).append(", level " + level + ", "); - - Comparator comparerInteger = new Comparator() { - @Override - public int compare(Integer o1, Integer o2) { - return o1.compareTo(o2); - }}; - - Map sortedColumnMap = new TreeMap(comparerInteger); - for (Map.Entry entry : projectionColumnMap.entrySet()) { - sortedColumnMap.put(entry.getValue(), entry.getKey()); - } - sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", "); - - sb.append("initial column names ").append(initialColumnNames.toString()).append(","); - sb.append("initial type infos ").append(initialTypeInfos.toString()).append(", "); - - sb.append("scratchColumnTypeNames ").append(Arrays.toString(getScratchColumnTypeNames())); - - return sb.toString(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java deleted file mode 100644 index 8232e67..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; - -// cast string group to string (varchar to string, etc.) -public class CastStringGroupToString extends StringUnaryUDFDirect { - - private static final long serialVersionUID = 1L; - - public CastStringGroupToString() { - super(); - } - - public CastStringGroupToString(int inputColumn, int outputColumnNum) { - super(inputColumn, outputColumnNum); - } - - @Override - protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { - outV.setVal(i, vector[i], start[i], length[i]); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java index 00e529d..75e60eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java @@ -66,53 +66,157 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { outputVector.init(); - outputVector.noNulls = false; outputVector.isRepeating = false; + final int limit = inputColumns.length; LongColumnVector inputIndexVector = (LongColumnVector) batch.cols[inputColumns[0]]; + boolean[] inputIndexIsNull = inputIndexVector.isNull; long[] indexVector = inputIndexVector.vector; if (inputIndexVector.isRepeating) { - int index = (int)indexVector[0]; - if (index > 0 && index < inputColumns.length) { - BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; - if (cv.isRepeating) { - outputVector.setElement(0, 0, cv); - outputVector.isRepeating = true; - } else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]); + if (inputIndexVector.noNulls || !inputIndexIsNull[0]) { + int repeatedIndex = (int) indexVector[0]; + if (repeatedIndex > 0 && repeatedIndex < limit) { + BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[repeatedIndex]]; + if (cv.isRepeating) { + outputVector.isNull[0] = false; + outputVector.setElement(0, 0, cv); + outputVector.isRepeating = true; + } else if (cv.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); + } + } else { + for (int i = 0; i != n; i++) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); + } + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!cv.isNull[i]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!cv.isNull[i]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } + } } } else { - for (int i = 0; i != n; i++) { - outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]); - } + outputVector.isNull[0] = true; + outputVector.noNulls = false; + outputVector.isRepeating = true; } } else { outputVector.isNull[0] = true; + outputVector.noNulls = false; outputVector.isRepeating = true; } - } else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { - int i = sel[j]; - int index = (int)indexVector[i]; - if (index > 0 && index < inputColumns.length) { - BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; - int cvi = cv.isRepeating ? 0 : i; - outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]); - } else { - outputVector.isNull[i] = true; + return; + } + + if (inputIndexVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + int index = (int) indexVector[i]; + if (index > 0 && index < limit) { + BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; + int adjusted = cv.isRepeating ? 0 : i; + if (!cv.isNull[adjusted]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[adjusted], cv.start[adjusted], cv.length[adjusted]); + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + int index = (int) indexVector[i]; + if (index > 0 && index < limit) { + BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; + int adjusted = cv.isRepeating ? 0 : i; + if (!cv.isNull[adjusted]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[adjusted], cv.start[adjusted], cv.length[adjusted]); + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } } } } else { - for (int i = 0; i != n; i++) { - int index = (int)indexVector[i]; - if (index > 0 && index < inputColumns.length) { - BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; - int cvi = cv.isRepeating ? 0 : i; - outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]); - } else { - outputVector.isNull[i] = true; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!inputIndexVector.isNull[i]) { + int index = (int) indexVector[i]; + if (index > 0 && index < limit) { + BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; + int adjusted = cv.isRepeating ? 0 : i; + if (cv.noNulls || !cv.isNull[adjusted]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[adjusted], cv.start[adjusted], cv.length[adjusted]); + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!inputIndexVector.isNull[i]) { + int index = (int) indexVector[i]; + if (index > 0 && index < limit) { + BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; + int adjusted = cv.isRepeating ? 0 : i; + if (cv.noNulls || !cv.isNull[adjusted]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[adjusted], cv.start[adjusted], cv.length[adjusted]); + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } + } else { + outputVector.isNull[i] = true; + outputVector.noNulls = false; + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index 6a87927..7829b22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.Text; @@ -601,6 +602,18 @@ public static VectorExpressionWriter genVectorExpressionWritable(ExprNodeDesc no } /** + * Compiles the appropriate vector expression writer based on an expression info (ExprNodeDesc) + */ + public static VectorExpressionWriter genVectorExpressionWritable(VectorExpression vecExpr) + throws HiveException { + TypeInfo outputTypeInfo = vecExpr.getOutputTypeInfo(); + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + return genVectorExpressionWritable(objectInspector); + } + + /** * Specialized writer for ListColumnVector. Will throw cast exception * if the wrong vector column is used. */ @@ -1746,6 +1759,19 @@ public Object initValue(Object ost) throws HiveException { } /** + * Helper function to create an array of writers from a list of expression descriptors. + */ + public static VectorExpressionWriter[] getExpressionWriters(VectorExpression[] vecExprs) + throws HiveException { + VectorExpressionWriter[] writers = new VectorExpressionWriter[vecExprs.length]; + for(int i=0; i actualList = (List) actualRow; + final int actualSize = actualList.size(); + List expectedList = (List) expectedRow; + final int expectedSize = expectedList.size(); + if (actualSize != expectedSize) { + return "Actual size " + actualSize + ", expected size " + expectedSize; + } + for (int i = 0; i < actualSize; i++) { + Object actualObject = actualList.get(i); + Object expecedObject = expectedList.get(i); + if (!actualObject.equals(expecedObject)) { + return "Column " + i + " is different"; + } + } + } else { + if (!actualRow.equals(expectedRow)) { + return "Object is different"; + } + } + return "Actual and expected row are the same"; + } + + private String getObjectDisplayString(Object object) { + StringBuilder sb = new StringBuilder(); + + if (object == null) { + sb.append("NULL"); + } else if (object instanceof Text || + object instanceof HiveChar || object instanceof HiveCharWritable || + object instanceof HiveVarchar || object instanceof HiveVarcharWritable) { + final String string; + if (object instanceof Text) { + Text text = (Text) object; + string = text.toString(); + } else if (object instanceof HiveChar) { + HiveChar hiveChar = (HiveChar) object; + string = hiveChar.getStrippedValue(); + } else if (object instanceof HiveCharWritable) { + HiveChar hiveChar = ((HiveCharWritable) object).getHiveChar(); + string = hiveChar.getStrippedValue(); + } else if (object instanceof HiveVarchar) { + HiveVarchar hiveVarchar = (HiveVarchar) object; + string = hiveVarchar.getValue(); + } else if (object instanceof HiveVarcharWritable) { + HiveVarchar hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); + string = hiveVarchar.getValue(); + } else { + throw new RuntimeException("Unexpected"); + } + + byte[] bytes = string.getBytes(); + final int byteLength = bytes.length; + + sb.append("'"); + sb.append(string); + sb.append("' (byte length "); + sb.append(bytes.length); + sb.append(", string length "); + sb.append(string.length()); + sb.append(", bytes "); + sb.append(VectorizedBatchUtil.displayBytes(bytes, 0, byteLength)); + sb.append(")"); + } else { + sb.append(object.toString()); + } + return sb.toString(); + } + + private String getRowDisplayString(Object row) { + StringBuilder sb = new StringBuilder(); + if (row instanceof List) { + List list = (List) row; + final int size = list.size(); + boolean isFirst = true; + for (int i = 0; i < size; i++) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + Object object = list.get(i); + sb.append(getObjectDisplayString(object)); + } + } else { + sb.append(getObjectDisplayString(row)); + } + return sb.toString(); + } + void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, - TypeInfo[] typeInfos, Object[][] randomRows, int firstRandomRowIndex ) { + TypeInfo[] typeInfos, Object[][] randomRows, int firstRandomRowIndex, + String title) { int rowSize = vectorExtractRow.getCount(); Object[] row = new Object[rowSize]; @@ -228,9 +329,15 @@ void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, " batch index " + i + " firstRandomRowIndex " + firstRandomRowIndex); } if (!rowObj.equals(expectedObj)) { + String actualValueString = getRowDisplayString(rowObj); + String expectedValueString = getRowDisplayString(expectedObj); + String differentInfoString = getDifferenceInfo(row, expectedObj); fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch (" + - typeInfos[c].getCategory() + " actual value " + rowObj + - " and expected value " + expectedObj + ")"); + typeInfos[c].getCategory() + " actual value '" + actualValueString + "'" + + " and expected value '" + expectedValueString + "')" + + " difference info " + differentInfoString + + " typeInfos " + Arrays.toString(typeInfos) + + " title " + title); } } } @@ -283,19 +390,27 @@ void testVectorDeserializeRow( throws HiveException, IOException, SerDeException { for (int i = 0; i < 20; i++) { - innerTestVectorDeserializeRow(r, serializationType, alternate1, alternate2, useExternalBuffer); + innerTestVectorDeserializeRow( + r, i,serializationType, alternate1, alternate2, useExternalBuffer); } } void innerTestVectorDeserializeRow( - Random r, SerializationType serializationType, + Random r, int iteration, + SerializationType serializationType, boolean alternate1, boolean alternate2, boolean useExternalBuffer) throws HiveException, IOException, SerDeException { + String title = "serializationType: " + serializationType + ", iteration " + iteration; + String[] emptyScratchTypeNames = new String[0]; VectorRandomRowSource source = new VectorRandomRowSource(); - source.init(r, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + + // FUTURE: try NULLs and UNICODE. + source.init( + r, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); @@ -426,13 +541,17 @@ void innerTestVectorDeserializeRow( } batch.size++; if (batch.size == batch.DEFAULT_SIZE) { - examineBatch(batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex); + examineBatch( + batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex, + title); firstRandomRowIndex = i + 1; batch.reset(); } } if (batch.size > 0) { - examineBatch(batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex); + examineBatch( + batch, vectorExtractRow, typeInfos, randomRows, firstRandomRowIndex, + title); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index af73ee6..b84273a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -129,6 +129,7 @@ private String[] alphabets; private boolean allowNull; + private boolean isUnicodeOk; private boolean addEscapables; private String needsEscapeStr; @@ -289,26 +290,28 @@ public StructObjectInspector partialRowStructObjectInspector(int partialFieldCou ALL, PRIMITIVES, ALL_EXCEPT_MAP } - public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth) { - init(r, supportedTypes, maxComplexDepth, true); - } - - public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, boolean allowNull) { + public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, boolean allowNull, + boolean isUnicodeOk) { this.r = r; this.allowNull = allowNull; + this.isUnicodeOk = isUnicodeOk; chooseSchema(supportedTypes, null, null, null, maxComplexDepth); } - public void init(Random r, Set allowedTypeNameSet, int maxComplexDepth, boolean allowNull) { + public void init(Random r, Set allowedTypeNameSet, int maxComplexDepth, boolean allowNull, + boolean isUnicodeOk) { this.r = r; this.allowNull = allowNull; + this.isUnicodeOk = isUnicodeOk; chooseSchema(SupportedTypes.ALL, allowedTypeNameSet, null, null, maxComplexDepth); } public void initExplicitSchema(Random r, List explicitTypeNameList, int maxComplexDepth, - boolean allowNull, List explicitDataTypePhysicalVariationList) { + boolean allowNull, boolean isUnicodeOk, + List explicitDataTypePhysicalVariationList) { this.r = r; this.allowNull = allowNull; + this.isUnicodeOk = isUnicodeOk; List generationSpecList = new ArrayList(); for (String explicitTypeName : explicitTypeNameList) { @@ -324,9 +327,11 @@ public void initExplicitSchema(Random r, List explicitTypeNameList, int } public void initGenerationSpecSchema(Random r, List generationSpecList, int maxComplexDepth, - boolean allowNull, List explicitDataTypePhysicalVariationList) { + boolean allowNull, boolean isUnicodeOk, + List explicitDataTypePhysicalVariationList) { this.r = r; this.allowNull = allowNull; + this.isUnicodeOk = isUnicodeOk; chooseSchema( SupportedTypes.ALL, null, generationSpecList, explicitDataTypePhysicalVariationList, maxComplexDepth); @@ -1009,9 +1014,19 @@ public static Object randomStringFamily(Random random, TypeInfo typeInfo, PrimitiveTypeInfo[] primitiveTypeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations) { + return randomPrimitiveRow( + columnCount, r, primitiveTypeInfos, dataTypePhysicalVariations, false); + } + + public static Object[] randomPrimitiveRow(int columnCount, Random r, + PrimitiveTypeInfo[] primitiveTypeInfos, + DataTypePhysicalVariation[] dataTypePhysicalVariations, boolean isUnicodeOk) { + final Object row[] = new Object[columnCount]; for (int c = 0; c < columnCount; c++) { - row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c], dataTypePhysicalVariations[c]); + row[c] = + randomPrimitiveObject( + r, primitiveTypeInfos[c], dataTypePhysicalVariations[c], isUnicodeOk); } return row; } @@ -1624,11 +1639,11 @@ public Object randomPrimitiveObject(int column) { } public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo) { - return randomPrimitiveObject(r, primitiveTypeInfo, DataTypePhysicalVariation.NONE); + return randomPrimitiveObject(r, primitiveTypeInfo, DataTypePhysicalVariation.NONE, false); } public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo, - DataTypePhysicalVariation dataTypePhysicalVariation) { + DataTypePhysicalVariation dataTypePhysicalVariation, boolean isUnicodeOk) { switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: @@ -1648,11 +1663,11 @@ public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitive case DOUBLE: return Double.valueOf(r.nextDouble() * 10 - 5); case STRING: - return RandomTypeUtil.getRandString(r); + return getRandString(r, isUnicodeOk); case CHAR: - return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo, isUnicodeOk); case VARCHAR: - return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo, isUnicodeOk); case BINARY: return getRandBinary(r, 1 + r.nextInt(100)); case TIMESTAMP: @@ -1682,22 +1697,30 @@ public static String randomPrimitiveTimestampStringObject(Random r) { return RandomTypeUtil.getRandTimestamp(r).toString(); } - public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { + public static String getRandString(Random r, boolean isUnicodeOk) { + return getRandString(r, r.nextInt(10), isUnicodeOk); + } + + public static String getRandString(Random r, int length, boolean isUnicodeOk) { + return + !isUnicodeOk || r.nextBoolean() ? + RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", length) : + RandomTypeUtil.getRandUnicodeString(r, length); + } + + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo, boolean isUnicodeOk) { final int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); - final String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + final String randomString = getRandString(r, 100, isUnicodeOk); return new HiveChar(randomString, maxLength); } - public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo, String alphabet) { + public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo, + boolean isUnicodeOk) { final int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); - final String randomString = RandomTypeUtil.getRandString(r, alphabet, 100); + final String randomString = getRandString(r, 100, isUnicodeOk); return new HiveVarchar(randomString, maxLength); } - public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { - return getRandHiveVarchar(r, varcharTypeInfo, "abcdefghijklmnopqrstuvwxyz"); - } - public static byte[] getRandBinary(Random r, int len){ final byte[] bytes = new byte[len]; for (int j = 0; j < len; j++){ diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java index 458aae8..a0ba0e1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorVerifyFast.java @@ -364,9 +364,9 @@ public static void serializeWrite(SerializeWrite serializeWrite, case STRING: { Text value = (Text) object; - byte[] stringBytes = value.getBytes(); - int stringLength = stringBytes.length; - serializeWrite.writeString(stringBytes, 0, stringLength); + byte[] bytes = value.getBytes(); + int byteLength = value.getLength(); + serializeWrite.writeString(bytes, 0, byteLength); } break; case CHAR: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/aggregation/TestVectorAggregation.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/aggregation/TestVectorAggregation.java index d4ed6b5..211eaa2 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/aggregation/TestVectorAggregation.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/aggregation/TestVectorAggregation.java @@ -369,7 +369,8 @@ private void doMerge(GenericUDAFEvaluator.Mode mergeUdafEvaluatorMode, VectorRandomRowSource mergeRowSource = new VectorRandomRowSource(); mergeRowSource.initGenerationSpecSchema( - random, mergeAggrGenerationSpecList, /* maxComplexDepth */ 0, /* allowNull */ false, + random, mergeAggrGenerationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ false, /* isUnicodeOk */ true, mergeDataTypePhysicalVariationList); Object[][] mergeRandomRows = mergeRowSource.randomRows(TEST_ROW_COUNT); @@ -508,7 +509,8 @@ private void doTests(Random random, String aggregationName, TypeInfo typeInfo, boolean allowNull = !aggregationName.equals("bloom_filter"); partial1RowSource.initGenerationSpecSchema( - random, dataAggrGenerationSpecList, /* maxComplexDepth */ 0, allowNull, + random, dataAggrGenerationSpecList, /* maxComplexDepth */ 0, + allowNull, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] partial1RandomRows = partial1RowSource.randomRows(TEST_ROW_COUNT); @@ -604,7 +606,8 @@ private void doTests(Random random, String aggregationName, TypeInfo typeInfo, VectorRandomRowSource completeRowSource = new VectorRandomRowSource(); completeRowSource.initGenerationSpecSchema( - random, dataAggrGenerationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, dataAggrGenerationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] completeRandomRows = completeRowSource.randomRows(TEST_ROW_COUNT); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java index 1b61071..1329d79 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java @@ -26,9 +26,7 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -59,25 +57,16 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMod; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import junit.framework.Assert; @@ -437,7 +426,8 @@ private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, Type VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java index 5b69bdf..16bb445 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java @@ -453,7 +453,8 @@ private boolean doBetweenInVariation(Random random, String typeName, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List columns = new ArrayList(); @@ -575,7 +576,8 @@ private boolean doBetweenStructInVariation(Random random, String structTypeName, VectorRandomRowSource structRowSource = new VectorRandomRowSource(); structRowSource.initGenerationSpecSchema( - random, structGenerationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, structGenerationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, structExplicitDataTypePhysicalVariationList); Object[][] structRandomRows = structRowSource.randomRows(100000); @@ -597,7 +599,8 @@ private boolean doBetweenStructInVariation(Random random, String structTypeName, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); @@ -729,7 +732,7 @@ private boolean executeTestModesAndVerify(TypeInfo typeInfo, continue; } case VECTOR_EXPRESSION: - if (!doVectorCastTest( + if (!doVectorBetweenInTest( typeInfo, betweenInVariation, compareList, @@ -866,7 +869,7 @@ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, } } - private boolean doVectorCastTest(TypeInfo typeInfo, + private boolean doVectorBetweenInTest(TypeInfo typeInfo, BetweenInVariation betweenInVariation, List compareList, List columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, @@ -899,13 +902,24 @@ private boolean doVectorCastTest(TypeInfo typeInfo, VectorExpressionDescriptor.Mode.PROJECTION)); vectorExpression.transientInit(); - if (betweenInTestMode == BetweenInTestMode.VECTOR_EXPRESSION && - vectorExpression instanceof VectorUDFAdaptor) { - System.out.println( - "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + - " betweenInTestMode " + betweenInTestMode + - " betweenInVariation " + betweenInVariation + - " vectorExpression " + vectorExpression.toString()); + if (betweenInTestMode == BetweenInTestMode.VECTOR_EXPRESSION) { + String vecExprString = vectorExpression.toString(); + if (vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " betweenInTestMode " + betweenInTestMode + + " betweenInVariation " + betweenInVariation + + " vectorExpression " + vecExprString); + } else if (dataTypePhysicalVariations[0] == DataTypePhysicalVariation.DECIMAL_64) { + final String nameToCheck = vectorExpression.getClass().getSimpleName(); + if (!nameToCheck.contains("Decimal64")) { + System.out.println( + "*EXPECTED DECIMAL_64 VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " betweenInTestMode " + betweenInTestMode + + " betweenInVariation " + betweenInVariation + + " vectorExpression " + vecExprString); + } + } } // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java index cc1415a..8a68506 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java @@ -24,8 +24,6 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -48,23 +46,17 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; import junit.framework.Assert; @@ -286,7 +278,8 @@ private void doIfTestOneCast(Random random, String typeName, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List columns = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java index 0bca490..d367fb9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.IntWritable; import junit.framework.Assert; @@ -66,7 +67,11 @@ public void testCoalesce() throws Exception { Random random = new Random(5371); - doCoalesceElt(random, /* isCoalesce */ true, false); + // Grind through a few more index values... + int iteration = 0; + for (int i = 0; i < 10; i++) { + iteration = doCoalesceElt(random, iteration, /* isCoalesce */ true, false); + } } @Test @@ -74,9 +79,10 @@ public void testElt() throws Exception { Random random = new Random(5371); // Grind through a few more index values... - for (int i = 0; i < 4; i++) { - doCoalesceElt(random, /* isCoalesce */ false, false); - doCoalesceElt(random, /* isCoalesce */ false, true); + int iteration = 0; + for (int i = 0; i < 10; i++) { + iteration = doCoalesceElt(random, iteration, /* isCoalesce */ false, false); + iteration = doCoalesceElt(random, iteration, /* isCoalesce */ false, true); } } @@ -88,39 +94,41 @@ public void testElt() throws Exception { static final int count = values().length; } - private void doCoalesceElt(Random random, boolean isCoalesce, boolean isEltIndexConst) - throws Exception { + private int doCoalesceElt(Random random, int iteration, boolean isCoalesce, + boolean isEltIndexConst) + throws Exception { - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 2, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 2, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0 }, /* nullConstantColumns */ null, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 1 }, /* nullConstantColumns */ null, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 1 }, /* nullConstantColumns */ new int[] { 1 }, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0, 2 }, /* nullConstantColumns */ null, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0, 2 }, /* allowNulls */ false); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 4, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 4, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 4, new int[] { 0, 1, 2 }, /* nullConstantColumns */ new int[] { 0, 1, 2 }, /* allowNulls */ true); - doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 4, + doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 4, new int[] { 0, 1, 2 }, /* nullConstantColumns */ new int[] { 0, 1, 2 }, /* allowNulls */ false); + return iteration; } private boolean contains(int[] columns, int column) { @@ -135,7 +143,7 @@ private boolean contains(int[] columns, int column) { return false; } - private boolean doCoalesceOnRandomDataType(Random random, + private boolean doCoalesceOnRandomDataType(Random random, int iteration, boolean isCoalesce, boolean isEltIndexConst, int columnCount, int[] constantColumns, int[] nullConstantColumns, boolean allowNulls) throws Exception { @@ -187,17 +195,18 @@ private boolean doCoalesceOnRandomDataType(Random random, List intValueList = new ArrayList(); for (int i = -1; i < columnCount + 2; i++) { - intValueList.add(i); + intValueList.add( + new IntWritable(i)); } final int intValueListCount = intValueList.size(); - ExprNodeDesc colExpr; + ExprNodeDesc intColExpr; if (!isEltIndexConst) { generationSpecList.add( GenerationSpec.createValueList(intTypeInfo, intValueList)); explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); String columnName = "col" + columnNum++; columns.add(columnName); - colExpr = new ExprNodeColumnDesc(intTypeInfo, columnName, "table", false); + intColExpr = new ExprNodeColumnDesc(intTypeInfo, columnName, "table", false); } else { final Object scalarObject; if (random.nextInt(10) != 0) { @@ -205,8 +214,9 @@ private boolean doCoalesceOnRandomDataType(Random random, } else { scalarObject = null; } - colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject); + intColExpr = new ExprNodeConstantDesc(typeInfo, scalarObject); } + children.add(intColExpr); } for (int c = 0; c < columnCount; c++) { ExprNodeDesc colExpr; @@ -235,7 +245,8 @@ private boolean doCoalesceOnRandomDataType(Random random, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ allowNulls, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ allowNulls, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); String[] columnNames = columns.toArray(new String[0]); @@ -295,6 +306,7 @@ private boolean doCoalesceOnRandomDataType(Random random, case VECTOR_EXPRESSION: if (!doVectorCastTest( typeInfo, + iteration, columns, columnNames, rowSource.typeInfos(), @@ -327,9 +339,10 @@ private boolean doCoalesceOnRandomDataType(Random random, "Row " + i + " sourceTypeName " + typeName + " " + coalesceEltTestMode + + " iteration " + iteration + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + - (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + (expectedResult == null ? "YES" : "NO result '" + expectedResult.toString()) + "'" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString()); } @@ -340,9 +353,10 @@ private boolean doCoalesceOnRandomDataType(Random random, "Row " + i + " sourceTypeName " + typeName + " " + coalesceEltTestMode + - " result " + vectorResult.toString() + + " iteration " + iteration + + " result '" + vectorResult.toString() + "'" + " (" + vectorResult.getClass().getSimpleName() + ")" + - " does not match row-mode expected result " + expectedResult.toString() + + " does not match row-mode expected result '" + expectedResult.toString() + "'" + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString()); @@ -410,7 +424,7 @@ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, } } - private boolean doVectorCastTest(TypeInfo typeInfo, + private boolean doVectorCastTest(TypeInfo typeInfo, int iteration, List columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List children, @@ -445,7 +459,7 @@ private boolean doVectorCastTest(TypeInfo typeInfo, " vectorExpression " + vectorExpression.toString()); } - System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); /* System.out.println( @@ -474,17 +488,6 @@ private boolean doVectorCastTest(TypeInfo typeInfo, new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); Object[] scrqtchRow = new Object[1]; - // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); - - /* - System.out.println( - "*DEBUG* typeInfo1 " + typeInfo1.toString() + - " typeInfo2 " + typeInfo2.toString() + - " arithmeticTestMode " + arithmeticTestMode + - " columnScalarMode " + columnScalarMode + - " vectorExpression " + vectorExpression.toString()); - */ - batchSource.resetBatchIteration(); int rowIndex = 0; while (true) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java index 68c14c8..b0e4b26 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java @@ -25,8 +25,6 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -46,23 +44,18 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import junit.framework.Assert; @@ -242,7 +235,8 @@ private void doDateAddSubTestsWithDiffColumnScalar(Random random, String dateTim VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java index 0da9d8c..d89299c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java @@ -25,8 +25,6 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -54,16 +52,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import junit.framework.Assert; @@ -251,7 +245,8 @@ private void doDateDiffTestsWithDiffColumnScalar(Random random, String dateTimeS VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java index ba9eaca..abdbc1c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java @@ -26,9 +26,7 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -60,12 +58,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -77,9 +71,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.BooleanWritable; import junit.framework.Assert; @@ -402,7 +393,8 @@ private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, Type VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java index d8ae175..ce66e2b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java @@ -273,7 +273,8 @@ private void doIfTestsWithDiffColumnScalar(Random random, String typeName, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initExplicitSchema( - random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, explicitTypeNameList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List columns = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java index 648feb0..9a49088 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIndex.java @@ -28,8 +28,6 @@ import java.util.stream.IntStream; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -306,7 +304,8 @@ private boolean doIndexOnRandomDataType(Random random, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ allowNulls, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ allowNulls, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); String[] columnNames = columns.toArray(new String[0]); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java index ea39848..a3f665b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java @@ -51,34 +51,16 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateDiff; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPDivide; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMinus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMod; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import junit.framework.Assert; @@ -219,7 +201,8 @@ private void doTests(Random random, TypeInfo typeInfo) VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java index 9b0a2ae..06d8fc8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java @@ -24,8 +24,6 @@ import java.util.Random; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; @@ -52,26 +50,17 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; import junit.framework.Assert; @@ -159,7 +148,8 @@ private boolean doIsNullOnRandomDataType(Random random, String functionName, boo VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List columns = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java index 8877b06..bb41c4f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java @@ -191,7 +191,8 @@ private void doStringConcatTestsWithDiffColumnScalar(Random random, VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java index dd53157..4099b45 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java @@ -149,7 +149,8 @@ private void doTests(Random random, String typeName, String functionName) VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List children = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java new file mode 100644 index 0000000..5062997 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.lang.reflect.Constructor; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorStructField { + + @Test + public void testStructField() throws Exception { + Random random = new Random(7743); + + for (int i = 0; i < 5; i++) { + doStructFieldTests(random); + } + } + + public enum StructFieldTestMode { + ROW_MODE, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + private void doStructFieldTests(Random random) throws Exception { + String structTypeName = + VectorRandomRowSource.getDecoratedTypeName( + random, "struct", SupportedTypes.ALL, /* allowedTypeNameSet */ null, + /* depth */ 0, /* maxDepth */ 2); + StructTypeInfo structTypeInfo = + (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName); + + List fieldNameList = structTypeInfo.getAllStructFieldNames(); + final int fieldCount = fieldNameList.size(); + for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) { + doOneStructFieldTest(random, structTypeInfo, structTypeName, fieldIndex); + } + } + + private void doOneStructFieldTest(Random random, StructTypeInfo structTypeInfo, + String structTypeName, int fieldIndex) + throws Exception { + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + + List columns = new ArrayList(); + int columnNum = 1; + + generationSpecList.add( + GenerationSpec.createSameType(structTypeInfo)); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + ExprNodeDesc col1Expr; + String columnName = "col" + (columnNum++); + col1Expr = new ExprNodeColumnDesc(structTypeInfo, columnName, "table", false); + columns.add(columnName); + + ObjectInspector structObjectInspector = + VectorRandomRowSource.getObjectInspector(structTypeInfo); + List objectInspectorList = new ArrayList(); + objectInspectorList.add(structObjectInspector); + + List children = new ArrayList(); + children.add(col1Expr); + + //---------------------------------------------------------------------------------------------- + + String[] columnNames = columns.toArray(new String[0]); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, + explicitDataTypePhysicalVariationList); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + List fieldNameList = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos(); + + String randomFieldName = fieldNameList.get(fieldIndex); + TypeInfo outputTypeInfo = fieldTypeInfoList.get(fieldIndex); + + ExprNodeFieldDesc exprNodeFieldDesc = + new ExprNodeFieldDesc(outputTypeInfo, col1Expr, randomFieldName, /* isList */ false); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[StructFieldTestMode.count][]; + for (int i = 0; i < StructFieldTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + StructFieldTestMode negativeTestMode = StructFieldTestMode.values()[i]; + switch (negativeTestMode) { + case ROW_MODE: + doRowStructFieldTest( + structTypeInfo, + columns, + children, + exprNodeFieldDesc, + randomRows, + rowSource.rowStructObjectInspector(), + outputTypeInfo, + resultObjects); + break; + case VECTOR_EXPRESSION: + doVectorStructFieldTest( + structTypeInfo, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + exprNodeFieldDesc, + negativeTestMode, + batchSource, + exprNodeFieldDesc.getWritableObjectInspector(), + outputTypeInfo, + resultObjects); + break; + default: + throw new RuntimeException("Unexpected Negative operator test mode " + negativeTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < StructFieldTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " structTypeName " + structTypeName + + " outputTypeName " + outputTypeInfo.getTypeName() + + " " + StructFieldTestMode.values()[v] + + " result is NULL " + (vectorResult == null) + + " does not match row-mode expected result is NULL " + (expectedResult == null) + + " row values " + Arrays.toString(randomRows[i])); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " structTypeName " + structTypeName + + " outputTypeName " + outputTypeInfo.getTypeName() + + " " + StructFieldTestMode.values()[v] + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i])); + } + } + } + } + } + + private void doRowStructFieldTest(TypeInfo typeInfo, + List columns, List children, + ExprNodeFieldDesc exprNodeFieldDesc, + Object[][] randomRows, + ObjectInspector rowInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception { + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " negativeTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + */ + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprNodeFieldDesc, hiveConf); + evaluator.initialize(rowInspector); + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = null; + try { + copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, objectInspector, ObjectInspectorCopyOption.WRITABLE); + } catch (Exception e) { + System.out.println("here"); + } + resultObjects[i] = copyResult; + } + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private void doVectorStructFieldTest(TypeInfo typeInfo, + List columns, + String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + ExprNodeFieldDesc exprNodeFieldDesc, + StructFieldTestMode negativeTestMode, + VectorRandomBatchSource batchSource, + ObjectInspector objectInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression(exprNodeFieldDesc); + vectorExpression.transientInit(); + + if (negativeTestMode == StructFieldTestMode.VECTOR_EXPRESSION && + vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " negativeTestMode " + negativeTestMode + + " vectorExpression " + vectorExpression.toString()); + } + + String[] outputScratchTypeNames= vectorizationContext.getScratchColumnTypeNames(); + + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + typeInfos, + dataTypePhysicalVariations, + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + outputScratchTypeNames, + null); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() }); + Object[] scrqtchRow = new Object[1]; + + // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " negativeTestMode " + negativeTestMode + + " vectorExpression " + vectorExpression.toString()); + */ + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + rowIndex += batch.size; + } + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java index a978782..2997dcd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java @@ -118,7 +118,8 @@ private void doTests(Random random, boolean useLength) VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List children = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java index c31bec5..21109f3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java @@ -137,7 +137,8 @@ private void doIfTestOneTimestampExtract(Random random, String dateTimeStringTyp VectorRandomRowSource rowSource = new VectorRandomRowSource(); rowSource.initGenerationSpecSchema( - random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, explicitDataTypePhysicalVariationList); List children = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java index 05a98a6..df91443 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java @@ -144,8 +144,10 @@ public void testBigIntRows() throws Exception { VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap(); VectorRandomRowSource valueSource = new VectorRandomRowSource(); - - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -170,7 +172,9 @@ public void testIntRows() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -194,8 +198,10 @@ public void testStringRows() throws Exception { VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap(); VectorRandomRowSource valueSource = new VectorRandomRowSource(); - - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -220,7 +226,9 @@ public void testMultiKeyRows1() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -245,8 +253,9 @@ public void testMultiKeyRows2() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -271,7 +280,9 @@ public void testMultiKeyRows3() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -296,7 +307,9 @@ public void testBigIntRowsClipped() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -321,7 +334,9 @@ public void testIntRowsClipped() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -346,7 +361,9 @@ public void testStringRowsClipped() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -371,7 +388,9 @@ public void testMultiKeyRowsClipped1() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -396,7 +415,9 @@ public void testMultiKeyRowsClipped2() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -421,7 +442,9 @@ public void testMultiKeyRowsClipped3() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -447,7 +470,9 @@ public void testBigIntRowsExact() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -472,7 +497,9 @@ public void testIntRowsExact() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -497,7 +524,9 @@ public void testStringRowsExact() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -522,7 +551,9 @@ public void testMultiKeyRowsExact1() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -547,7 +578,9 @@ public void testMultiKeyRowsExact2() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -572,7 +605,9 @@ public void testMultiKeyRowsExact3() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -597,7 +632,9 @@ public void testBigIntRowsClippedExact() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -622,7 +659,9 @@ public void testIntRowsClippedExact() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -647,7 +686,9 @@ public void testStringRowsClippedExact() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -672,7 +713,9 @@ public void testMultiKeyRowsClippedExact1() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -697,7 +740,9 @@ public void testMultiKeyRowsClippedExact2() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); @@ -722,7 +767,9 @@ public void testMultiKeyRowsClippedExact3() throws Exception { VectorRandomRowSource valueSource = new VectorRandomRowSource(); - valueSource.init(random, VectorRandomRowSource.SupportedTypes.ALL, 4, false); + valueSource.init( + random, VectorRandomRowSource.SupportedTypes.ALL, 4, + /* allowNulls */ false, /* isUnicodeOk */ false); int rowCount = 1000; Object[][] rows = valueSource.randomRows(rowCount); diff --git ql/src/test/queries/clientpositive/query_result_fileformat.q ql/src/test/queries/clientpositive/query_result_fileformat.q index a4c63e1..a32f25f 100644 --- ql/src/test/queries/clientpositive/query_result_fileformat.q +++ ql/src/test/queries/clientpositive/query_result_fileformat.q @@ -7,7 +7,7 @@ http://asdf' value from src limit 1; select * from nzhang_test1; select count(*) from nzhang_test1; -explain +explain vectorization detail select * from nzhang_test1 where key='key1'; select * from nzhang_test1 where key='key1'; @@ -18,7 +18,7 @@ select * from nzhang_test1; select count(*) from nzhang_test1; -explain +explain vectorization detail select * from nzhang_test1 where key='key1'; select * from nzhang_test1 where key='key1'; diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out index 3ab6547..411d693 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -524,7 +524,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 40, 42, 45, 46] - selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 48)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 48:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 49:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 49:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 19:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 35:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 35:boolean) -> 36:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 37:boolean, col 38:timestampcol 39:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 37:boolean, CastDateToTimestamp(col 12:date) -> 38:timestamp, CastDateToTimestamp(col 11:date) -> 39:timestamp) -> 40:timestamp, IfExprColumnNull(col 37:boolean, col 41:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 37:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 41:int) -> 42:int, IfExprNullColumn(col 43:boolean, null, col 44)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 43:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 44:int) -> 45:int, IfExprLongScalarLongScalar(col 47:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 46:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 46:int) -> 47:boolean) -> 46:date + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 48)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 17:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 48:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 49:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 18:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 49:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 19:boolean) -> 33:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 19:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 19:boolean) -> 34:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 35:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 35:boolean) -> 36:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 37:boolean, col 38:timestampcol 39:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 37:boolean, CastDateToTimestamp(col 12:date) -> 38:timestamp, CastDateToTimestamp(col 11:date) -> 39:timestamp) -> 40:timestamp, IfExprColumnNull(col 37:boolean, col 41:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 37:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 41:int) -> 42:int, IfExprNullColumn(col 43:boolean, null, col 44)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 43:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 44:int) -> 45:int, IfExprLongScalarLongScalar(col 47:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 46:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 46:int) -> 47:boolean) -> 46:date Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -856,8 +856,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 71, 75, 78, 81, 82] - selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 84)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 84:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 85:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 85:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 66:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 70:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 70:boolean) -> 71:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 72:boolean, col 73:timestampcol 74:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 72:boolean, CastDateToTimestamp(col 12:date) -> 73:timestamp, CastDateToTimestamp(col 11:date) -> 74:timestamp) -> 75:timestamp, IfExprCondExprNull(col 76:boolean, col 77:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 76:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 77:int) -> 78:int, IfExprNullCondExpr(col 79:boolean, null, col 80:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 79:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 80:int) -> 81:int, IfExprLongScalarLongScalar(col 83:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 82:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 82:int) -> 83:boolean) -> 82:date + projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 62, 64, 66, 67, 68, 70, 74, 77, 80, 81] + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 61:boolean, null, col 83)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 61:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 83:decimal(10,2)) -> 62:decimal(10,2), IfExprColumnNull(col 63:boolean, col 84:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 63:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 84:decimal(10,2)) -> 64:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 65:boolean) -> 66:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 65:boolean) -> 67:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 65:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 65:boolean) -> 68:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 69:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 69:boolean) -> 70:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 71:boolean, col 72:timestampcol 73:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 71:boolean, CastDateToTimestamp(col 12:date) -> 72:timestamp, CastDateToTimestamp(col 11:date) -> 73:timestamp) -> 74:timestamp, IfExprCondExprNull(col 75:boolean, col 76:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 75:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 76:int) -> 77:int, IfExprNullCondExpr(col 78:boolean, null, col 79:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 78:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 79:int) -> 80:int, IfExprLongScalarLongScalar(col 82:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 81:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 81:int) -> 82:boolean) -> 81:date Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -885,7 +885,7 @@ STAGE PLANS: includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2)/DECIMAL_64, l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2)/DECIMAL_64, bigint, decimal(10,2)/DECIMAL_64, bigint, timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, decimal(10,2), decimal(10,2)] + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2)/DECIMAL_64, bigint, decimal(10,2)/DECIMAL_64, bigint, timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, decimal(10,2), decimal(10,2)] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index 31b3807..4ae125b 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -514,7 +514,6 @@ STAGE PLANS: Map-reduce partition columns: CAST( _col1 AS STRING) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyExpressions: CastStringGroupToString(col 1:char(10)) -> 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 2d043e7..5db37eb 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -71,8 +71,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [9, 10, 14] - selectExpressions: StringGroupConcatColCol(col 0:string, col 1:string) -> 9:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 10:varchar(30), StringGroupColEqualStringGroupColumn(col 11:string, col 13:string)(children: StringGroupConcatColCol(col 0:string, col 1:string) -> 11:string, CastStringGroupToString(col 12:varchar(30))(children: StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 12:varchar(30)) -> 13:string) -> 14:boolean + projectedOutputColumnNums: [9, 10, 13] + selectExpressions: StringGroupConcatColCol(col 0:string, col 1:string) -> 9:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 10:varchar(30), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringGroupConcatColCol(col 0:string, col 1:string) -> 11:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 12:varchar(30)) -> 13:boolean Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 @@ -106,7 +106,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 - scratchColumnTypeNames: [string, string, string, string, string, bigint] + scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator @@ -172,8 +172,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [9, 10, 14] - selectExpressions: StringUpper(col 1:string) -> 9:string, StringUpper(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 13:string)(children: StringUpper(col 1:string) -> 11:string, CastStringGroupToString(col 12:varchar(20))(children: StringUpper(col 3:varchar(20)) -> 12:varchar(20)) -> 13:string) -> 14:boolean + projectedOutputColumnNums: [9, 10, 13] + selectExpressions: StringUpper(col 1:string) -> 9:string, StringUpper(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringUpper(col 1:string) -> 11:string, StringUpper(col 3:varchar(20)) -> 12:varchar(20)) -> 13:boolean Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 @@ -207,7 +207,7 @@ STAGE PLANS: includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 - scratchColumnTypeNames: [string, string, string, string, string, bigint] + scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator @@ -273,8 +273,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [9, 10, 14] - selectExpressions: StringLower(col 1:string) -> 9:string, StringLower(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 13:string)(children: StringLower(col 1:string) -> 11:string, CastStringGroupToString(col 12:varchar(20))(children: StringLower(col 3:varchar(20)) -> 12:varchar(20)) -> 13:string) -> 14:boolean + projectedOutputColumnNums: [9, 10, 13] + selectExpressions: StringLower(col 1:string) -> 9:string, StringLower(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringLower(col 1:string) -> 11:string, StringLower(col 3:varchar(20)) -> 12:varchar(20)) -> 13:boolean Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 @@ -308,7 +308,7 @@ STAGE PLANS: includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 - scratchColumnTypeNames: [string, string, string, string, string, bigint] + scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_casts.q.out ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index 6a72515..25dc151 100644 --- ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -183,8 +183,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] - selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, CastStringToTimestamp(col 6:string) -> 56:timestamp, CastStringToTimestamp(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, CastTimestampToString(col 8:timestamp) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double + projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 66, 67, 68, 69, 71, 73] + selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, CastStringToTimestamp(col 6:string) -> 56:timestamp, CastStringToTimestamp(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, CastTimestampToString(col 8:timestamp) -> 65:string, CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10), CastStringGroupToVarChar(col 6:string, maxLength 10) -> 67:varchar(10), CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 68:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 69:double, CastDoubleToString(col 70:double)(children: FuncSinDoubleToDouble(col 4:float) -> 70:double) -> 71:string, DoubleColAddDoubleColumn(col 70:double, col 72:double)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 70:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -212,7 +212,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 6, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, double, double, double, double, double, double, double, double, double, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, bigint, timestamp, timestamp, timestamp, timestamp, timestamp, string, string, string, string, string, string, string, string, string, string, string, double, double, double, double, double] + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, double, double, double, double, double, double, double, double, double, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, bigint, timestamp, timestamp, timestamp, timestamp, timestamp, string, string, string, string, string, string, string, string, string, string, double, double, double, string, double, double] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/query_result_fileformat.q.out ql/src/test/results/clientpositive/query_result_fileformat.q.out index a6d3a20..85030cf 100644 --- ql/src/test/results/clientpositive/query_result_fileformat.q.out +++ ql/src/test/results/clientpositive/query_result_fileformat.q.out @@ -37,12 +37,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_test1 #### A masked pattern was here #### 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from nzhang_test1 where key='key1' PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from nzhang_test1 where key='key1' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -55,21 +59,51 @@ STAGE PLANS: alias: nzhang_test1 filterExpr: (key = 'key1') (type: boolean) Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 0:string, val key1) predicate: (key = 'key1') (type: boolean) Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 'key1' (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1] + selectExpressions: ConstantVectorExpression(val key1) -> 3:string Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Stage: Stage-0 Fetch Operator @@ -110,12 +144,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_test1 #### A masked pattern was here #### 1 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select * from nzhang_test1 where key='key1' PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select * from nzhang_test1 where key='key1' POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,21 +166,51 @@ STAGE PLANS: alias: nzhang_test1 filterExpr: (key = 'key1') (type: boolean) Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 0:string, val key1) predicate: (key = 'key1') (type: boolean) Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 'key1' (type: string), value (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1] + selectExpressions: ConstantVectorExpression(val key1) -> 3:string Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_case_when_1.q.out ql/src/test/results/clientpositive/vector_case_when_1.q.out index 01fc3ce..88cba90 100644 --- ql/src/test/results/clientpositive/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -516,7 +516,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 40, 42, 45, 46] - selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 48)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 48:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 49:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 49:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 19:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 34:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 35:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 35:boolean) -> 36:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 37:boolean, col 38:timestampcol 39:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 37:boolean, CastDateToTimestamp(col 12:date) -> 38:timestamp, CastDateToTimestamp(col 11:date) -> 39:timestamp) -> 40:timestamp, IfExprColumnNull(col 37:boolean, col 41:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 37:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 41:int) -> 42:int, IfExprNullColumn(col 43:boolean, null, col 44)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 43:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 44:int) -> 45:int, IfExprLongScalarLongScalar(col 47:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 46:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 46:int) -> 47:boolean) -> 46:date + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 48)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 17:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 48:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 49:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 18:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 49:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 19:boolean) -> 33:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 19:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 19:boolean) -> 34:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 35:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 35:boolean) -> 36:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 37:boolean, col 38:timestampcol 39:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 37:boolean, CastDateToTimestamp(col 12:date) -> 38:timestamp, CastDateToTimestamp(col 11:date) -> 39:timestamp) -> 40:timestamp, IfExprColumnNull(col 37:boolean, col 41:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 37:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 41:int) -> 42:int, IfExprNullColumn(col 43:boolean, null, col 44)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 43:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 44:int) -> 45:int, IfExprLongScalarLongScalar(col 47:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 46:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 46:int) -> 47:boolean) -> 46:date Statistics: Num rows: 101 Data size: 78500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -844,8 +844,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 71, 75, 78, 81, 82] - selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 62:boolean, null, col 84)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 62:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 84:decimal(10,2)) -> 63:decimal(10,2), IfExprColumnNull(col 64:boolean, col 85:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 64:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 85:decimal(10,2)) -> 65:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 67:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 68:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 66:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 61:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 66:boolean) -> 69:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 70:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 61:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 61:string) -> 70:boolean) -> 71:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 72:boolean, col 73:timestampcol 74:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 72:boolean, CastDateToTimestamp(col 12:date) -> 73:timestamp, CastDateToTimestamp(col 11:date) -> 74:timestamp) -> 75:timestamp, IfExprCondExprNull(col 76:boolean, col 77:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 76:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 77:int) -> 78:int, IfExprNullCondExpr(col 79:boolean, null, col 80:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 79:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 80:int) -> 81:int, IfExprLongScalarLongScalar(col 83:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 82:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 82:int) -> 83:boolean) -> 82:date + projectedOutputColumnNums: [4, 27, 38, 48, 52, 54, 60, 62, 64, 66, 67, 68, 70, 74, 77, 80, 81] + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 23:boolean, col 28:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 23:boolean, ConstantVectorExpression(val Single) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 36:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 29:boolean, ConstantVectorExpression(val Two) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 35:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 31:boolean, ConstantVectorExpression(val Some) -> 32:string, IfExprColumnNull(col 33:boolean, col 34:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 33:boolean, ConstantVectorExpression(val Many) -> 34:string) -> 35:string) -> 36:string) -> 37:string) -> 38:string, IfExprColumnCondExpr(col 39:boolean, col 40:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 39:boolean, ConstantVectorExpression(val Single) -> 40:string, IfExprColumnCondExpr(col 41:boolean, col 42:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 41:boolean, ConstantVectorExpression(val Two) -> 42:string, IfExprColumnCondExpr(col 43:boolean, col 44:stringcol 45:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 43:boolean, ConstantVectorExpression(val Some) -> 44:string, IfExprNullNull(null, null) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprDoubleColumnLongScalar(col 57:boolean, col 58:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 58:double) -> 54:double, IfExprCondExprColumn(col 57:boolean, col 59:double, col 58:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 57:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 58:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 58:double) -> 59:double, ConstantVectorExpression(val 0.0) -> 58:double) -> 60:double, IfExprNullColumn(col 61:boolean, null, col 83)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 61:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 83:decimal(10,2)) -> 62:decimal(10,2), IfExprColumnNull(col 63:boolean, col 84:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 63:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 84:decimal(10,2)) -> 64:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 65:boolean) -> 66:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 65:boolean) -> 67:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 65:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 13:string, val DELIVER IN PERSON)(children: col 13:varchar(20)) -> 65:boolean) -> 68:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 69:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualStringScalar(col 13:string, val TAKE BACK RETURN)(children: col 13:varchar(20)) -> 69:boolean) -> 70:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 71:boolean, col 72:timestampcol 73:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 71:boolean, CastDateToTimestamp(col 12:date) -> 72:timestamp, CastDateToTimestamp(col 11:date) -> 73:timestamp) -> 74:timestamp, IfExprCondExprNull(col 75:boolean, col 76:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 75:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 76:int) -> 77:int, IfExprNullCondExpr(col 78:boolean, null, col 79:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 78:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 79:int) -> 80:int, IfExprLongScalarLongScalar(col 82:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 81:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 81:int) -> 82:boolean) -> 81:date Statistics: Num rows: 101 Data size: 78500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -872,7 +872,7 @@ STAGE PLANS: includeColumns: [1, 2, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14] dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2)/DECIMAL_64, l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, string, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2)/DECIMAL_64, bigint, decimal(10,2)/DECIMAL_64, bigint, timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, decimal(10,2), decimal(10,2)] + scratchColumnTypeNames: [bigint, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, string, bigint, string, bigint, string, string, string, string, string, bigint, bigint, bigint, bigint, bigint, double, double, bigint, bigint, double, double, double, bigint, decimal(10,2), bigint, decimal(10,2), bigint, decimal(12,2), decimal(12,2), decimal(10,2)/DECIMAL_64, bigint, decimal(10,2)/DECIMAL_64, bigint, timestamp, timestamp, timestamp, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, decimal(10,2), decimal(10,2)] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 260c159..934a1e7 100644 --- ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -465,7 +465,7 @@ STAGE PLANS: 0 CAST( _col1 AS STRING) (type: string) 1 _col1 (type: string) Map Join Vectorization: - bigTableKeyExpressions: CastStringGroupToString(col 1:char(10)) -> 3:string + bigTableKeyExpressions: col 1:char(10) bigTableValueExpressions: col 0:int, col 1:char(10) className: VectorMapJoinOperator native: false diff --git ql/src/test/results/clientpositive/vectorized_casts.q.out ql/src/test/results/clientpositive/vectorized_casts.q.out index cf77aee..cfe2391 100644 --- ql/src/test/results/clientpositive/vectorized_casts.q.out +++ ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -180,8 +180,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 67, 68, 69, 70, 66, 73] - selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, CastStringToTimestamp(col 6:string) -> 56:timestamp, CastStringToTimestamp(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, CastTimestampToString(col 8:timestamp) -> 65:string, CastStringGroupToString(col 66:char(10))(children: CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10)) -> 67:string, CastStringGroupToString(col 66:varchar(10))(children: CastStringGroupToVarChar(col 6:string, maxLength 10) -> 66:varchar(10)) -> 68:string, CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 69:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 70:double, CastDoubleToString(col 71:double)(children: FuncSinDoubleToDouble(col 4:float) -> 71:double) -> 66:string, DoubleColAddDoubleColumn(col 71:double, col 72:double)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 71:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double + projectedOutputColumnNums: [13, 14, 15, 16, 17, 18, 10, 20, 19, 21, 0, 1, 2, 3, 22, 23, 10, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 4, 5, 35, 36, 37, 38, 39, 5, 41, 43, 45, 47, 48, 49, 51, 54, 55, 8, 56, 57, 26, 58, 59, 60, 61, 62, 63, 64, 65, 6, 66, 67, 68, 69, 71, 73] + selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 13:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 15:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 16:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 18:boolean, CastLongToBooleanViaLongToLong(col 19:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 19:bigint) -> 20:boolean, CastTimestampToBoolean(col 8:timestamp) -> 19:boolean, CastStringToBoolean(col 6) -> 21:boolean, CastDoubleToLong(col 4:float) -> 22:int, CastDoubleToLong(col 5:double) -> 23:int, CastTimestampToLong(col 8:timestamp) -> 24:int, CastStringToLong(col 6:string) -> 25:int, CastStringToLong(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 27:int, CastDoubleToLong(col 4:float) -> 28:tinyint, CastDoubleToLong(col 4:float) -> 29:smallint, CastDoubleToLong(col 4:float) -> 30:bigint, CastLongToDouble(col 0:tinyint) -> 31:double, CastLongToDouble(col 1:smallint) -> 32:double, CastLongToDouble(col 2:int) -> 33:double, CastLongToDouble(col 3:bigint) -> 34:double, CastLongToDouble(col 10:boolean) -> 35:double, CastTimestampToDouble(col 8:timestamp) -> 36:double, CastStringToDouble(col 6:string) -> 37:double, CastStringToDouble(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 38:double, CastLongToFloatViaLongToDouble(col 2:int) -> 39:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 41:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 43:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 45:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 47:timestamp, CastDoubleToTimestamp(col 4:float) -> 48:timestamp, CastDoubleToTimestamp(col 5:double) -> 49:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 51:timestamp, CastMillisecondsLongToTimestamp(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 54:timestamp, CastDateToTimestamp(col 52:date)(children: CastTimestampToDate(col 8:timestamp) -> 52:date) -> 55:timestamp, CastStringToTimestamp(col 6:string) -> 56:timestamp, CastStringToTimestamp(col 26:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 26:string) -> 57:timestamp, CastLongToString(col 0:tinyint) -> 26:string, CastLongToString(col 1:smallint) -> 58:string, CastLongToString(col 2:int) -> 59:string, CastLongToString(col 3:bigint) -> 60:string, CastFloatToString(col 4:float) -> 61:string, CastDoubleToString(col 5:double) -> 62:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 63:string, CastLongToString(col 52:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 52:bigint) -> 64:string, CastTimestampToString(col 8:timestamp) -> 65:string, CastStringGroupToChar(col 6:string, maxLength 10) -> 66:char(10), CastStringGroupToVarChar(col 6:string, maxLength 10) -> 67:varchar(10), CastLongToFloatViaLongToDouble(col 52:int)(children: CastDoubleToLong(col 4:float) -> 52:int) -> 68:float, CastLongToDouble(col 52:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 52:int) -> 69:double, CastDoubleToString(col 70:double)(children: FuncSinDoubleToDouble(col 4:float) -> 70:double) -> 71:string, DoubleColAddDoubleColumn(col 70:double, col 72:double)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 70:float, CastLongToDouble(col 10:boolean) -> 72:double) -> 73:double Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -208,7 +208,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 6, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, double, double, double, double, double, double, double, double, double, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, bigint, timestamp, timestamp, timestamp, timestamp, timestamp, string, string, string, string, string, string, string, string, string, string, string, double, double, double, double, double] + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, string, bigint, bigint, bigint, bigint, double, double, double, double, double, double, double, double, double, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, timestamp, bigint, timestamp, timestamp, timestamp, timestamp, timestamp, string, string, string, string, string, string, string, string, string, string, double, double, double, string, double, double] Stage: Stage-0 Fetch Operator diff --git serde/src/java/org/apache/hadoop/hive/serde2/RandomTypeUtil.java serde/src/java/org/apache/hadoop/hive/serde2/RandomTypeUtil.java index 9360509..3720b68 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/RandomTypeUtil.java +++ serde/src/java/org/apache/hadoop/hive/serde2/RandomTypeUtil.java @@ -57,6 +57,35 @@ public static String getRandString(Random r, String characters, int length) { return bytes; } + public static String getRandUnicodeString(Random r) { + return getRandUnicodeString(r, r.nextInt(10)); + } + + // Skip lower ASCII to avoid punctuation that might mess up serialization, etc... + private static int MIN_RANDOM_CODEPOINT = 256; + private static int RANGE_RANDOM_CODEPOINT = Character.MAX_CODE_POINT + 1 - MIN_RANDOM_CODEPOINT; + + public static String getRandUnicodeString(Random r, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + char ch; + while (true) { + int codePoint = MIN_RANDOM_CODEPOINT + r.nextInt(RANGE_RANDOM_CODEPOINT); + if (!Character.isDefined(codePoint) || + Character.getType(codePoint) == Character.PRIVATE_USE) { + continue; + } + ch = (char) codePoint; + if (Character.isSurrogate(ch)) { + continue; + } + break; + } + sb.append(ch); + } + return sb.toString(); + } + private static final String DECIMAL_CHARS = "0123456789"; public static HiveDecimal getRandHiveDecimal(Random r) {