diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5b702bb..4a91a7c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -49,8 +49,51 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode; -import org.apache.hadoop.hive.ql.exec.vector.expressions.*; -import org.apache.hadoop.hive.ql.exec.vector.AggregateDefinition; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToCharViaLongToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToVarCharViaLongToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringGroupToChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringGroupToVarChar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDecimalColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStructColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IDecimalInExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IDoubleInExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ILongInExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IStructInExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StructColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.TruncStringOutput; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorElt; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; @@ -81,7 +124,23 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToTimestampViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastMillisecondsLongToTimestampViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToDoubleViaLongToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -104,21 +163,54 @@ import org.apache.hadoop.hive.ql.udf.UDFToLong; import org.apache.hadoop.hive.ql.udf.UDFToShort; import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.udf.generic.*; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPositive; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToIntervalDayTime; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToIntervalYearMonth; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.DateUtils; + /** * Context class for vectorization execution. * Main role is to map column names to column indices and serves as a @@ -1276,17 +1368,200 @@ private VectorExpression getEltExpression(List childExpr, TypeInfo } } + public enum InConstantType { + INT_FAMILY, + TIMESTAMP, + DATE, + FLOAT_FAMILY, + STRING_FAMILY, + DECIMAL + } + + public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCategory primitiveCategory) { + + switch (primitiveCategory) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + return InConstantType.INT_FAMILY; + + case DATE: + return InConstantType.TIMESTAMP; + + case TIMESTAMP: + return InConstantType.DATE; + + case FLOAT: + case DOUBLE: + return InConstantType.FLOAT_FAMILY; + + case STRING: + case CHAR: + case VARCHAR: + case BINARY: + return InConstantType.STRING_FAMILY; + + case DECIMAL: + return InConstantType.DECIMAL; + + + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY_TIME: + // UNDONE: Fall through for these... they don't appear to be supported yet. + default: + throw new RuntimeException("Unexpected primitive type category " + primitiveCategory); + } + } + + private VectorExpression getStructInExpression(List childExpr, ExprNodeDesc colExpr, + TypeInfo colTypeInfo, List inChildren, Mode mode, TypeInfo returnType) + throws HiveException { + + VectorExpression expr = null; + + StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo; + + ArrayList fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + final int fieldCount = fieldTypeInfos.size(); + ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount]; + InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount]; + for (int f = 0; f < fieldCount; f++) { + TypeInfo fieldTypeInfo = fieldTypeInfos.get(f); + // Only primitive fields supports for now. + if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) { + return null; + } + + // We are going to serialize using the 4 basic types. + ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo); + fieldVectorColumnTypes[f] = fieldVectorColumnType; + + // We currently evaluate the IN (..) constants in special ways. + PrimitiveCategory fieldPrimitiveCategory = + ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory(); + InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory); + fieldInConstantTypes[f] = inConstantType; + } + + Output buffer = new Output(); + BinarySortableSerializeWrite binarySortableSerializeWrite = + new BinarySortableSerializeWrite(fieldCount); + + final int inChildrenCount = inChildren.size(); + byte[][] serializedInChildren = new byte[inChildrenCount][]; + try { + for (int i = 0; i < inChildrenCount; i++) { + ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) inChildren.get(i); + + StandardStructObjectInspector oi = (StandardStructObjectInspector) exprNode.getWritableObjectInspector(); + + ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprNode); + ObjectInspector output = evaluator.initialize(oi); + Object[] constants = (Object[]) evaluator.evaluate(null); + + binarySortableSerializeWrite.set(buffer); + for (int f = 0; f < fieldCount; f++) { + Object constant = constants[f]; + if (constant == null) { + binarySortableSerializeWrite.writeNull(); + } else { + InConstantType inConstantType = fieldInConstantTypes[f]; + switch (inConstantType) { + case STRING_FAMILY: + { + byte[] bytes; + if (constant instanceof Text) { + Text text = (Text) constant; + bytes = text.getBytes(); + binarySortableSerializeWrite.writeString(bytes, 0, text.getLength()); + } else { + throw new HiveException("Unexpected constant String type " + + constant.getClass().getSimpleName()); + } + } + break; + case INT_FAMILY: + { + long value; + if (constant instanceof IntWritable) { + value = ((IntWritable) constant).get(); + } else if (constant instanceof LongWritable) { + value = ((LongWritable) constant).get(); + } else { + throw new HiveException("Unexpected constant Long type " + + constant.getClass().getSimpleName()); + } + binarySortableSerializeWrite.writeLong(value); + } + break; + + case FLOAT_FAMILY: + { + double value; + if (constant instanceof DoubleWritable) { + value = ((DoubleWritable) constant).get(); + } else { + throw new HiveException("Unexpected constant Double type " + + constant.getClass().getSimpleName()); + } + binarySortableSerializeWrite.writeDouble(value); + } + break; + + // UNDONE... + case DATE: + case TIMESTAMP: + case DECIMAL: + default: + throw new RuntimeException("Unexpected IN constant type " + inConstantType.name()); + } + } + } + serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength()); + } + } catch (Exception e) { + throw new HiveException(e); + } + + // Create a single child representing the scratch column where we will + // generate the serialized keys of the batch. + int scratchBytesCol = ocm.allocateOutputColumn("string"); + + Class cl = (mode == Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class); + + expr = createVectorExpression(cl, null, Mode.PROJECTION, returnType); + + ((IStringInExpr) expr).setInListValues(serializedInChildren); + + ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol); + ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), + fieldVectorColumnTypes); + + return expr; + } + /** * Create a filter or boolean-valued expression for column IN ( ) */ private VectorExpression getInExpression(List childExpr, Mode mode, TypeInfo returnType) throws HiveException { ExprNodeDesc colExpr = childExpr.get(0); + List inChildren = childExpr.subList(1, childExpr.size()); String colType = colExpr.getTypeString(); + colType = VectorizationContext.mapTypeNameSynonyms(colType); + TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType); + Category category = colTypeInfo.getCategory(); + if (category == Category.STRUCT){ + return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType); + } else if (category != Category.PRIMITIVE) { + return null; + } // prepare arguments for createVectorExpression - List childrenForInList = evaluateCastOnConstants(childExpr.subList(1, childExpr.size())); + List childrenForInList = evaluateCastOnConstants(inChildren); /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere, * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java index 2434e90..e34ec75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java @@ -20,16 +20,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFLike; -import org.apache.hadoop.io.Text; - -import java.util.Arrays; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Evaluate an IN filter on a batch for a vector of strings. @@ -165,6 +156,10 @@ public String getOutputType() { return "boolean"; } + public void setInputColumn(int inputCol) { + this.inputCol = inputCol; + } + @Override public int getOutputColumn() { return -1; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java new file mode 100644 index 0000000..00f22bb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; + +/** + * Evaluate an IN filter on a batch for a vector of structs. + * This is optimized so that no objects have to be created in + * the inner loop, and there is a hash table implemented + * with Cuckoo hashing that has fast lookup to do the IN test. + */ +public class FilterStructColumnInList extends FilterStringColumnInList implements IStructInExpr { + private static final long serialVersionUID = 1L; + private VectorExpression[] structExpressions; + private ColumnVector.Type[] fieldVectorColumnTypes; + private int[] structColumnMap; + private int scratchBytesColumn; + + private transient Output buffer; + private transient BinarySortableSerializeWrite binarySortableSerializeWrite; + + /** + * After construction you must call setInListValues() to add the values to the IN set + * (on the IStringInExpr interface). + * + * And, call a and b on the IStructInExpr interface. + */ + public FilterStructColumnInList() { + super(-1); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + final int logicalSize = batch.size; + if (logicalSize == 0) { + return; + } + + if (buffer == null) { + buffer = new Output(); + binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length); + } + + for (VectorExpression ve : structExpressions) { + ve.evaluate(batch); + } + + BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn]; + + try { + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logical = 0; logical < logicalSize; logical++) { + int batchIndex = (selectedInUse ? selected[logical] : logical); + + binarySortableSerializeWrite.set(buffer); + for (int f = 0; f < structColumnMap.length; f++) { + int fieldColumn = structColumnMap[f]; + ColumnVector colVec = batch.cols[fieldColumn]; + int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex); + if (colVec.noNulls || !colVec.isNull[adjustedIndex]) { + switch (fieldVectorColumnTypes[f]) { + case BYTES: + { + BytesColumnVector bytesColVec = (BytesColumnVector) colVec; + byte[] bytes = bytesColVec.vector[adjustedIndex]; + int start = bytesColVec.start[adjustedIndex]; + int length = bytesColVec.length[adjustedIndex]; + binarySortableSerializeWrite.writeString(bytes, start, length); + } + break; + + case LONG: + binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]); + break; + + case DOUBLE: + binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]); + break; + + case DECIMAL: + binarySortableSerializeWrite.writeHiveDecimal( + ((DecimalColumnVector) colVec).vector[adjustedIndex].getHiveDecimal()); + break; + + default: + throw new RuntimeException("Unexpected vector column type " + + fieldVectorColumnTypes[f].name()); + } + } else { + binarySortableSerializeWrite.writeNull(); + } + } + scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength()); + } + + // Now, take the serialized keys we just wrote into our scratch column and look them + // up in the IN list. + super.evaluate(batch); + + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public void setScratchBytesColumn(int scratchBytesColumn) { + + // Tell our super class FilterStringColumnInList it will be evaluating our scratch + // BytesColumnVector. + super.setInputColumn(scratchBytesColumn); + this.scratchBytesColumn = scratchBytesColumn; + } + + @Override + public void setStructColumnExprs(VectorizationContext vContext, + List structColumnExprs, ColumnVector.Type[] fieldVectorColumnTypes) + throws HiveException { + + structExpressions = vContext.getVectorExpressions(structColumnExprs); + structColumnMap = new int[structExpressions.length]; + for (int i = 0; i < structColumnMap.length; i++) { + VectorExpression ve = structExpressions[i]; + structColumnMap[i] = ve.getOutputColumn(); + } + this.fieldVectorColumnTypes = fieldVectorColumnTypes; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java new file mode 100644 index 0000000..3b25255 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStructInExpr.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; + +/** + * Interface used for both filter and non-filter versions of IN to simplify + * VectorizationContext code. + */ +public interface IStructInExpr { + void setScratchBytesColumn(int scratchBytesColumn); + void setStructColumnExprs(VectorizationContext vContext, List structColumnExprs, + ColumnVector.Type[] fieldVectorColumnTypes) throws HiveException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index 03833a2..b90e3c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -140,6 +140,10 @@ public String getOutputType() { return "boolean"; } + public void setInputColumn(int inputCol) { + this.inputCol = inputCol; + } + @Override public int getOutputColumn() { return this.outputColumn; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java new file mode 100644 index 0000000..724497a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; + +/** + * Evaluate an IN boolean expression (not a filter) on a batch for a vector of structs. + * This is optimized so that no objects have to be created in + * the inner loop, and there is a hash table implemented + * with Cuckoo hashing that has fast lookup to do the IN test. + */ +public class StructColumnInList extends StringColumnInList implements IStructInExpr { + private static final long serialVersionUID = 1L; + private VectorExpression[] structExpressions; + private ColumnVector.Type[] fieldVectorColumnTypes; + private int[] structColumnMap; + private int scratchBytesColumn; + + private transient Output buffer; + private transient BinarySortableSerializeWrite binarySortableSerializeWrite; + + public StructColumnInList() { + super(); + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public StructColumnInList(int outputColumn) { + super(-1, outputColumn); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + final int logicalSize = batch.size; + if (logicalSize == 0) { + return; + } + + if (buffer == null) { + buffer = new Output(); + binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length); + } + + for (VectorExpression ve : structExpressions) { + ve.evaluate(batch); + } + + BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn]; + + try { + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logical = 0; logical < logicalSize; logical++) { + int batchIndex = (selectedInUse ? selected[logical] : logical); + + binarySortableSerializeWrite.set(buffer); + for (int f = 0; f < structColumnMap.length; f++) { + int fieldColumn = structColumnMap[f]; + ColumnVector colVec = batch.cols[fieldColumn]; + int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex); + if (colVec.noNulls || !colVec.isNull[adjustedIndex]) { + switch (fieldVectorColumnTypes[f]) { + case BYTES: + { + BytesColumnVector bytesColVec = (BytesColumnVector) colVec; + byte[] bytes = bytesColVec.vector[adjustedIndex]; + int start = bytesColVec.start[adjustedIndex]; + int length = bytesColVec.length[adjustedIndex]; + binarySortableSerializeWrite.writeString(bytes, start, length); + } + break; + + case LONG: + binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]); + break; + + case DOUBLE: + binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]); + break; + + case DECIMAL: + binarySortableSerializeWrite.writeHiveDecimal( + ((DecimalColumnVector) colVec).vector[adjustedIndex].getHiveDecimal()); + break; + + default: + throw new RuntimeException("Unexpected vector column type " + + fieldVectorColumnTypes[f].name()); + } + } else { + binarySortableSerializeWrite.writeNull(); + } + } + scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength()); + } + + // Now, take the serialized keys we just wrote into our scratch column and look them + // up in the IN list. + super.evaluate(batch); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + + @Override + public void setScratchBytesColumn(int scratchBytesColumn) { + + // Tell our super class FilterStringColumnInList it will be evaluating our scratch + // BytesColumnVector. + super.setInputColumn(scratchBytesColumn); + this.scratchBytesColumn = scratchBytesColumn; + } + + @Override + public void setStructColumnExprs(VectorizationContext vContext, + List structColumnExprs, ColumnVector.Type[] fieldVectorColumnTypes) + throws HiveException { + + structExpressions = vContext.getVectorExpressions(structColumnExprs); + structColumnMap = new int[structExpressions.length]; + for (int i = 0; i < structColumnMap.length; i++) { + VectorExpression ve = structExpressions[i]; + structColumnMap[i] = ve.getOutputColumn(); + } + this.fieldVectorColumnTypes = fieldVectorColumnTypes; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 82c3e50..7e9f10f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -54,10 +54,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; @@ -139,8 +141,11 @@ import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -578,7 +583,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (nonVectorizableChildOfGroupBy(op)) { return new Boolean(true); } - boolean ret = validateMapWorkOperator(op, mapWork, isTez); + boolean ret; + try { + ret = validateMapWorkOperator(op, mapWork, isTez); + } catch (Exception e) { + throw new SemanticException(e); + } if (!ret) { LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); @@ -1261,18 +1271,65 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressio LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); return false; } + boolean isInExpression = false; if (desc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; boolean r = validateGenericUdf(d); if (!r) { return false; } + GenericUDF genericUDF = d.getGenericUDF(); + isInExpression = (genericUDF instanceof GenericUDFIn); } if (desc.getChildren() != null) { - for (ExprNodeDesc d: desc.getChildren()) { - // Don't restrict child expressions for projection. Always use looser FILTER mode. - boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER); - if (!r) { + if (isInExpression && + desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { + boolean r = validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER); + } else { + for (ExprNodeDesc d: desc.getChildren()) { + // Don't restrict child expressions for projection. Always use looser FILTER mode. + boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER); + if (!r) { + return false; + } + } + } + } + return true; + } + + private boolean validateStructInExpression(ExprNodeDesc desc, + VectorExpressionDescriptor.Mode mode) { + + for (ExprNodeDesc d: desc.getChildren()) { + TypeInfo typeInfo = d.getTypeInfo(); + if (typeInfo.getCategory() != Category.STRUCT){ + return false; + } + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + + ArrayList fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + ArrayList fieldNames = structTypeInfo.getAllStructFieldNames(); + final int fieldCount = fieldTypeInfos.size(); + for (int f = 0; f < fieldCount; f++) { + TypeInfo fieldTypeInfo = fieldTypeInfos.get(f); + Category category = fieldTypeInfo.getCategory(); + if (category != Category.PRIMITIVE){ + LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + + " of type " + fieldTypeInfo.getTypeName()); + return false; + } + PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo; + InConstantType inConstantType = + VectorizationContext.getInConstantTypeFromPrimitiveCategory( + fieldPrimitiveTypeInfo.getPrimitiveCategory()); + + // For now, limit the data types we support for Vectorized Struct IN(). + if (inConstantType != InConstantType.INT_FAMILY && + inConstantType != InConstantType.FLOAT_FAMILY && + inConstantType != InConstantType.STRING_FAMILY) { + LOG.info("Cannot vectorize struct field " + fieldNames.get(f) + + " of type " + fieldTypeInfo.getTypeName()); return false; } } diff --git ql/src/test/queries/clientpositive/vector_struct_in.q ql/src/test/queries/clientpositive/vector_struct_in.q new file mode 100644 index 0000000..0e3a4ca --- /dev/null +++ ql/src/test/queries/clientpositive/vector_struct_in.q @@ -0,0 +1,247 @@ +set hive.cbo.enable=false; +set hive.tez.dynamic.partition.pruning=false; +set hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=true; + +-- SORT_QUERY_RESULTS + +-- 2 Strings +create table test_1 (`id` string, `lineid` string) stored as orc; + +insert into table test_1 values ('one','1'), ('seven','1'); + +explain +select * from test_1 where struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +); + +select * from test_1 where struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +); + +explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) as b from test_1 ; + +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) as b from test_1 ; + + +-- 2 Integers +create table test_2 (`id` int, `lineid` int) stored as orc; + +insert into table test_2 values (1,1), (7,1); + +explain +select * from test_2 where struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +); + +select * from test_2 where struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +); + +explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) as b from test_2; + +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) as b from test_2; + +-- 1 String and 1 Integer +create table test_3 (`id` string, `lineid` int) stored as orc; + +insert into table test_3 values ('one',1), ('seven',1); + +explain +select * from test_3 where struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +); + +select * from test_3 where struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +); + +explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) as b from test_3; + +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) as b from test_3; + +-- 1 Integer and 1 String and 1 Double +create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc; + +insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5); + +explain +select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +); + +select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +); + +explain +select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) as b from test_4; + +select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) as b from test_4; \ No newline at end of file diff --git ql/src/test/results/clientpositive/vector_struct_in.q.out ql/src/test/results/clientpositive/vector_struct_in.q.out new file mode 100644 index 0000000..2528882 --- /dev/null +++ ql/src/test/results/clientpositive/vector_struct_in.q.out @@ -0,0 +1,825 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- 2 Strings +create table test_1 (`id` string, `lineid` string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- 2 Strings +create table test_1 (`id` string, `lineid` string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: insert into table test_1 values ('one','1'), ('seven','1') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test_1 +POSTHOOK: query: insert into table test_1 values ('one','1'), ('seven','1') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_1.lineid SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select * from test_1 where struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test_1 where struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(id,lineid)) IN (struct('two','3'), struct('three','1'), struct('one','1'), struct('five','2'), struct('six','1'), struct('eight','1'), struct('seven','1'), struct('nine','1'), struct('ten','1')) (type: boolean) + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_1 where struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_1 where struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +one 1 +seven 1 +PREHOOK: query: explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) as b from test_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) as b from test_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_1 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: string), (struct(id,lineid)) IN (struct('two','3'), struct('three','1'), struct('one','1'), struct('five','2'), struct('six','1'), struct('eight','1'), struct('seven','1'), struct('nine','1'), struct('ten','1')) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) as b from test_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +#### A masked pattern was here #### +POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two','3'), +struct('three','1'), +struct('one','1'), +struct('five','2'), +struct('six','1'), +struct('eight','1'), +struct('seven','1'), +struct('nine','1'), +struct('ten','1') +) as b from test_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +#### A masked pattern was here #### +one 1 true +seven 1 true +PREHOOK: query: -- 2 Integers +create table test_2 (`id` int, `lineid` int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: -- 2 Integers +create table test_2 (`id` int, `lineid` int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: insert into table test_2 values (1,1), (7,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@test_2 +POSTHOOK: query: insert into table test_2 values (1,1), (7,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_2.lineid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select * from test_2 where struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test_2 where struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(id,lineid)) IN (struct(2,3), struct(3,1), struct(1,1), struct(5,2), struct(6,1), struct(8,1), struct(7,1), struct(9,1), struct(10,1)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), lineid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_2 where struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_2 where struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +1 1 +7 1 +PREHOOK: query: explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) as b from test_2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) as b from test_2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), lineid (type: int), (struct(id,lineid)) IN (struct(2,3), struct(3,1), struct(1,1), struct(5,2), struct(6,1), struct(8,1), struct(7,1), struct(9,1), struct(10,1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) as b from test_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct(2,3), +struct(3,1), +struct(1,1), +struct(5,2), +struct(6,1), +struct(8,1), +struct(7,1), +struct(9,1), +struct(10,1) +) as b from test_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +1 1 true +7 1 true +PREHOOK: query: -- 1 String and 1 Integer +create table test_3 (`id` string, `lineid` int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_3 +POSTHOOK: query: -- 1 String and 1 Integer +create table test_3 (`id` string, `lineid` int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_3 +PREHOOK: query: insert into table test_3 values ('one',1), ('seven',1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@test_3 +POSTHOOK: query: insert into table test_3 values ('one',1), ('seven',1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@test_3 +POSTHOOK: Lineage: test_3.id SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_3.lineid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select * from test_3 where struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test_3 where struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_3 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(id,lineid)) IN (struct('two',3), struct('three',1), struct('one',1), struct('five',2), struct('six',1), struct('eight',1), struct('seven',1), struct('nine',1), struct('ten',1)) (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_3 where struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_3 where struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_3 +#### A masked pattern was here #### +one 1 +seven 1 +PREHOOK: query: explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) as b from test_3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) as b from test_3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_3 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), lineid (type: int), (struct(id,lineid)) IN (struct('two',3), struct('three',1), struct('one',1), struct('five',2), struct('six',1), struct('eight',1), struct('seven',1), struct('nine',1), struct('ten',1)) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) as b from test_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_3 +#### A masked pattern was here #### +POSTHOOK: query: select `id`, `lineid`, struct(`id`, `lineid`) +IN ( +struct('two',3), +struct('three',1), +struct('one',1), +struct('five',2), +struct('six',1), +struct('eight',1), +struct('seven',1), +struct('nine',1), +struct('ten',1) +) as b from test_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_3 +#### A masked pattern was here #### +one 1 true +seven 1 true +PREHOOK: query: -- 1 Integer and 1 String and 1 Double +create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_4 +POSTHOOK: query: -- 1 Integer and 1 String and 1 Double +create table test_4 (`my_bigint` bigint, `my_string` string, `my_double` double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_4 +PREHOOK: query: insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@test_4 +POSTHOOK: query: insert into table test_4 values (1, "b", 1.5), (1, "a", 0.5), (2, "b", 1.5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@test_4 +POSTHOOK: Lineage: test_4.my_bigint EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test_4.my_double EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test_4.my_string SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_4 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(my_bigint,my_string,my_double)) IN (struct(1,'a',1.5), struct(1,'b',-0.5), struct(3,'b',1.5), struct(1,'d',1.5), struct(1,'c',1.5), struct(1,'b',2.5), struct(1,'b',0.5), struct(5,'b',1.5), struct(1,'a',0.5), struct(3,'b',1.5)) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) +PREHOOK: type: QUERY +PREHOOK: Input: default@test_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_4 where struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_4 +#### A masked pattern was here #### +1 a 0.5 +PREHOOK: query: explain +select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) as b from test_4 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) as b from test_4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_4 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double), (struct(my_bigint,my_string,my_double)) IN (struct(1,'a',1.5), struct(1,'b',-0.5), struct(3,'b',1.5), struct(1,'d',1.5), struct(1,'c',1.5), struct(1,'b',2.5), struct(1,'b',0.5), struct(5,'b',1.5), struct(1,'a',0.5), struct(3,'b',1.5)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) as b from test_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_4 +#### A masked pattern was here #### +POSTHOOK: query: select `my_bigint`, `my_string`, `my_double`, struct(`my_bigint`, `my_string`, `my_double`) +IN ( +struct(1L, "a", 1.5), +struct(1L, "b", -0.5), +struct(3L, "b", 1.5), +struct(1L, "d", 1.5), +struct(1L, "c", 1.5), +struct(1L, "b", 2.5), +struct(1L, "b", 0.5), +struct(5L, "b", 1.5), +struct(1L, "a", 0.5), +struct(3L, "b", 1.5) +) as b from test_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_4 +#### A masked pattern was here #### +1 a 0.5 true +1 b 1.5 false +2 b 1.5 false