diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java index 02ddf08..66aa524 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java @@ -55,7 +55,7 @@ public String getStrippedValue() { return StringUtils.stripEnd(value, " "); } - protected String getPaddedValue() { + public String getPaddedValue() { return value; } diff --git common/src/java/org/apache/hive/common/util/DateUtils.java common/src/java/org/apache/hive/common/util/DateUtils.java index 454a6c8..c749bcb 100644 --- common/src/java/org/apache/hive/common/util/DateUtils.java +++ common/src/java/org/apache/hive/common/util/DateUtils.java @@ -65,5 +65,12 @@ public static void setIntervalDayTimeTotalNanos(HiveIntervalDayTime intervalDayT long totalNanos) { intervalDayTime.set(totalNanos / NANOS_PER_SEC, (int) (totalNanos % NANOS_PER_SEC)); } -} + public static long getIntervalDayTimeTotalSecondsFromTotalNanos(long totalNanos) { + return totalNanos / NANOS_PER_SEC; + } + + public static int getIntervalDayTimeNanosFromTotalNanos(long totalNanos) { + return (int) (totalNanos % NANOS_PER_SEC); + } +} diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 1502d80..f678d0f 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -181,9 +181,11 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_non_partitioned.q,\ update_where_partitioned.q,\ update_two_cols.q,\ + vector_aggregate_9.q,\ vector_between_in.q,\ vector_bucket.q,\ vector_cast_constant.q,\ + vector_char_2.q,\ vector_char_4.q,\ vector_char_mapjoin1.q,\ vector_char_simple.q,\ @@ -191,6 +193,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_coalesce_2.q,\ vector_count_distinct.q,\ vector_data_types.q,\ + vector_date_1.q,\ vector_decimal_1.q,\ vector_decimal_10_0.q,\ vector_decimal_2.q,\ @@ -204,6 +207,8 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_decimal_mapjoin.q,\ vector_decimal_math_funcs.q,\ vector_decimal_precision.q,\ + vector_decimal_round.q,\ + vector_decimal_round_2.q,\ vector_decimal_trailing.q,\ vector_decimal_udf.q,\ vector_decimal_udf2.q,\ @@ -211,8 +216,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_elt.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ + vector_if_expr.q,\ + vector_interval_1.q,\ + vector_interval_2.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ + vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ vector_partition_diff_num_cols.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 0cfae8b..6956393 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -465,9 +465,9 @@ private static BaseWork getBaseWork(Configuration conf, String name) { } } - public static Map> getMapWorkAllScratchColumnVectorTypeMaps(Configuration hiveConf) { + public static Map getMapWorkVectorScratchColumnTypeMap(Configuration hiveConf) { MapWork mapWork = getMapWork(hiveConf); - return mapWork.getAllScratchColumnVectorTypeMaps(); + return mapWork.getVectorScratchColumnTypeMap(); } public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index b89937d..545d7c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -232,7 +232,7 @@ private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector sources[tag] = new ReduceRecordSource(); sources[tag].init(jconf, redWork.getReducer(), redWork.getVectorMode(), keyTableDesc, valueTableDesc, reader, tag == bigTablePosition, (byte) tag, - redWork.getAllScratchColumnVectorTypeMaps()); + redWork.getVectorScratchColumnTypeMap()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index fcb959c..1236df5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -114,7 +114,7 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag, - Map> scratchColumnVectorTypes) + Map vectorScratchColumnTypeMap) throws Exception { ObjectInspector keyObjectInspector; @@ -180,10 +180,8 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT } rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); - Map reduceShuffleScratchColumnTypeMap = - scratchColumnVectorTypes.get("_REDUCE_SHUFFLE_"); batchContext = new VectorizedRowBatchCtx(); - batchContext.init(reduceShuffleScratchColumnTypeMap, (StructObjectInspector) rowObjectInspector); + batchContext.init(vectorScratchColumnTypeMap, (StructObjectInspector) rowObjectInspector); batch = batchContext.createVectorizedRowBatch(); } else { ois.add(keyObjectInspector); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java new file mode 100644 index 0000000..809d7d4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -0,0 +1,596 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/** + * This class assigns specified columns of a row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is abstract to allow the subclasses to control batch reuse. + */ +public abstract class VectorAssignRow { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorAssignRow.class); + + protected abstract class Assigner { + protected int columnIndex; + + Assigner(int columnIndex) { + this.columnIndex = columnIndex; + } + + public int getColumnIndex() { + return columnIndex; + } + + abstract void setColumnVector(VectorizedRowBatch batch); + + abstract void forgetColumnVector(); + + abstract void assign(int batchIndex, Object object); + } + + private class VoidAssigner extends Assigner { + + VoidAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + } + + @Override + void forgetColumnVector() { + } + + @Override + void assign(int batchIndex, Object object) { + // This is no-op, there is no column to assign to and the object is expected to be null. + assert (object == null); + } + } + + private abstract class AbstractLongAssigner extends Assigner { + + protected LongColumnVector colVector; + protected long[] vector; + + AbstractLongAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (LongColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + protected class BooleanAssigner extends AbstractLongAssigner { + + BooleanAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + BooleanWritable bw = (BooleanWritable) object; + vector[batchIndex] = (bw.get() ? 1 : 0); + } + } + } + + protected class ByteAssigner extends AbstractLongAssigner { + + ByteAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + ByteWritable bw = (ByteWritable) object; + vector[batchIndex] = bw.get(); + } + } + } + + private class ShortAssigner extends AbstractLongAssigner { + + ShortAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + ShortWritable sw = (ShortWritable) object; + vector[batchIndex] = sw.get(); + } + } + } + + private class IntAssigner extends AbstractLongAssigner { + + IntAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + IntWritable iw = (IntWritable) object; + vector[batchIndex] = iw.get(); + } + } + } + + private class LongAssigner extends AbstractLongAssigner { + + LongAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + LongWritable lw = (LongWritable) object; + vector[batchIndex] = lw.get(); + } + } + } + + private class DateAssigner extends AbstractLongAssigner { + + DateAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + DateWritable bw = (DateWritable) object; + vector[batchIndex] = bw.getDays(); + } + } + } + + private class TimestampAssigner extends AbstractLongAssigner { + + TimestampAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + TimestampWritable tw = (TimestampWritable) object; + Timestamp t = tw.getTimestamp(); + vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + } + } + } + + private class IntervalYearMonthAssigner extends AbstractLongAssigner { + + IntervalYearMonthAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + HiveIntervalYearMonthWritable iymw = (HiveIntervalYearMonthWritable) object; + HiveIntervalYearMonth iym = iymw.getHiveIntervalYearMonth(); + vector[batchIndex] = iym.getTotalMonths(); + } + } + } + + private class IntervalDayTimeAssigner extends AbstractLongAssigner { + + IntervalDayTimeAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object; + HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime(); + vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(idt); + } + } + } + + private abstract class AbstractDoubleAssigner extends Assigner { + + protected DoubleColumnVector colVector; + protected double[] vector; + + AbstractDoubleAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DoubleColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + private class FloatAssigner extends AbstractDoubleAssigner { + + FloatAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + FloatWritable fw = (FloatWritable) object; + vector[batchIndex] = fw.get(); + } + } + } + + private class DoubleAssigner extends AbstractDoubleAssigner { + + DoubleAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + DoubleWritable dw = (DoubleWritable) object; + vector[batchIndex] = dw.get(); + } + } + } + + private abstract class AbstractBytesAssigner extends Assigner { + + protected BytesColumnVector colVector; + + AbstractBytesAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (BytesColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + } + + private class BinaryAssigner extends AbstractBytesAssigner { + + BinaryAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + BytesWritable bw = (BytesWritable) object; + colVector.setVal(batchIndex, bw.getBytes(), 0, bw.getLength()); + } + } + } + + private class StringAssigner extends AbstractBytesAssigner { + + StringAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + Text tw = (Text) object; + colVector.setVal(batchIndex, tw.getBytes(), 0, tw.getLength()); + } + } + } + + private class VarCharAssigner extends AbstractBytesAssigner { + + VarCharAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // We store VARCHAR type stripped of pads. + HiveVarchar hiveVarchar; + if (object instanceof HiveVarchar) { + hiveVarchar = (HiveVarchar) object; + } else { + hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); + } + byte[] bytes = hiveVarchar.getValue().getBytes(); + colVector.setVal(batchIndex, bytes, 0, bytes.length); + } + } + } + + private class CharAssigner extends AbstractBytesAssigner { + + CharAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // We store CHAR type stripped of pads. + HiveChar hiveChar; + if (object instanceof HiveChar) { + hiveChar = (HiveChar) object; + } else { + hiveChar = ((HiveCharWritable) object).getHiveChar(); + } + + // We store CHAR in vector row batch with padding stripped. + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + colVector.setVal(batchIndex, bytes, 0, bytes.length); + } + } + } + + private class DecimalAssigner extends Assigner { + + protected DecimalColumnVector colVector; + + DecimalAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DecimalColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + if (object instanceof HiveDecimal) { + colVector.set(batchIndex, (HiveDecimal) object); + } else { + colVector.set(batchIndex, (HiveDecimalWritable) object); + } + } + } + } + + private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + Assigner assigner; + switch (primitiveCategory) { + case VOID: + assigner = new VoidAssigner(columnIndex); + break; + case BOOLEAN: + assigner = new BooleanAssigner(columnIndex); + break; + case BYTE: + assigner = new ByteAssigner(columnIndex); + break; + case SHORT: + assigner = new ShortAssigner(columnIndex); + break; + case INT: + assigner = new IntAssigner(columnIndex); + break; + case LONG: + assigner = new LongAssigner(columnIndex); + break; + case TIMESTAMP: + assigner = new TimestampAssigner(columnIndex); + break; + case DATE: + assigner = new DateAssigner(columnIndex); + break; + case FLOAT: + assigner = new FloatAssigner(columnIndex); + break; + case DOUBLE: + assigner = new DoubleAssigner(columnIndex); + break; + case BINARY: + assigner = new BinaryAssigner(columnIndex); + break; + case STRING: + assigner = new StringAssigner(columnIndex); + break; + case VARCHAR: + assigner = new VarCharAssigner(columnIndex); + break; + case CHAR: + assigner = new CharAssigner(columnIndex); + break; + case DECIMAL: + assigner = new DecimalAssigner(columnIndex); + break; + case INTERVAL_YEAR_MONTH: + assigner = new IntervalYearMonthAssigner(columnIndex); + break; + case INTERVAL_DAY_TIME: + assigner = new IntervalDayTimeAssigner(columnIndex); + break; + default: + throw new HiveException("No vector row assigner for primitive category " + + primitiveCategory); + } + return assigner; + } + + Assigner[] assigners; + + public void init(StructObjectInspector structObjectInspector, List projectedColumns) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + assigners = new Assigner[fields.size()]; + + int i = 0; + for (StructField field : fields) { + int columnIndex = projectedColumns.get(i); + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( + fieldInspector.getTypeName()); + assigners[i] = createAssigner(primitiveTypeInfo, columnIndex); + i++; + } + } + + public void init(List typeNames) throws HiveException { + + assigners = new Assigner[typeNames.size()]; + + int i = 0; + for (String typeName : typeNames) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + assigners[i] = createAssigner(primitiveTypeInfo, i); + i++; + } + } + + protected void setBatch(VectorizedRowBatch batch) throws HiveException { + for (int i = 0; i < assigners.length; i++) { + Assigner assigner = assigners[i]; + int columnIndex = assigner.getColumnIndex(); + if (batch.cols[columnIndex] == null) { + throw new HiveException("Unexpected null vector column " + columnIndex); + } + assigner.setColumnVector(batch); + } + } + + protected void forgetBatch() { + for (Assigner assigner : assigners) { + assigner.forgetColumnVector(); + } + } + + public void assignRowColumn(int batchIndex, int logicalColumnIndex, Object object) { + assigners[logicalColumnIndex].assign(batchIndex, object); + } + + public void assignRow(int batchIndex, Object[] objects) { + int i = 0; + for (Assigner assigner : assigners) { + assigner.assign(batchIndex, objects[i++]); + } + } + +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java new file mode 100644 index 0000000..a696825 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is for use when the batch being assigned may change each time before processOp + * is called. + */ +public class VectorAssignRowDynBatch extends VectorAssignRow { + + public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } + + public void forgetBatchOnExit() { + forgetBatch(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java new file mode 100644 index 0000000..8c7c2ad --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is for use when the batch being assigned is always the same. + */ +public class VectorAssignRowSameBatch extends VectorAssignRow { + + public void setOneBatch(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java new file mode 100644 index 0000000..1cba4f7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; + +/** + * This class collects column information for copying a row from one VectorizedRowBatch to + * same/another batch. + */ +public abstract class VectorColumnMapping { + + private static final long serialVersionUID = 1L; + + protected int[] sourceColumns; + protected int[] outputColumns; + protected String[] typeNames; + + protected VectorColumnOrderedMap vectorColumnMapping; + + public VectorColumnMapping() { + this.vectorColumnMapping = new VectorColumnOrderedMap(); + } + + public abstract void add(int sourceColumn, int outputColumn, String typeName); + + public abstract void finalize(); + + public int getCount() { + return sourceColumns.length; + } + + public int[] getInputColumns() { + return sourceColumns; + } + + public int[] getOutputColumns() { + return outputColumns; + } + + public String[] getTypeNames() { + return typeNames; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("source columns: " + Arrays.toString(sourceColumns)); + sb.append(", "); + sb.append("output columns: " + Arrays.toString(outputColumns)); + sb.append(", "); + sb.append("type names: " + Arrays.toString(typeNames)); + return sb.toString(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java new file mode 100644 index 0000000..96a4f83 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.ArrayList; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This class collects column information for mapping vector columns, including the hive type name. + * + * The column information are kept ordered by a specified column. + * + * Call getMapping to collects the results into convenient arrays. + */ +public class VectorColumnOrderedMap { + protected static transient final Log LOG = LogFactory.getLog(VectorColumnOrderedMap.class); + + private TreeMap orderedTreeMap; + + private class Value { + int valueColumn; + + String typeName; + + Value(int valueColumn, String typeName) { + this.valueColumn = valueColumn; + this.typeName = typeName; + } + } + + public class Mapping { + + private final int[] orderedColumns; + private final int[] valueColumns; + private final String[] typeNames; + + Mapping(int[] orderedColumns, int[] valueColumns, String[] typeNames) { + this.orderedColumns = orderedColumns; + this.valueColumns = valueColumns; + this.typeNames = typeNames; + } + + public int getCount() { + return orderedColumns.length; + } + + public int[] getOrderedColumns() { + return orderedColumns; + } + + public int[] getValueColumns() { + return valueColumns; + } + + public String[] getTypeNames() { + return typeNames; + } + } + + public VectorColumnOrderedMap() { + orderedTreeMap = new TreeMap(); + } + + public void add(int orderedColumn, int valueColumn, String typeName) { + if (orderedTreeMap.containsKey(orderedColumn)) { + throw new Error("Duplicate column " + orderedColumn + " in ordered column map"); + } + orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeName)); + } + + public boolean orderedColumnsContain(int orderedColumn) { + return orderedTreeMap.containsKey(orderedColumn); + } + + public Mapping getMapping() { + ArrayList orderedColumns = new ArrayList(); + ArrayList valueColumns = new ArrayList(); + ArrayList typeNames = new ArrayList(); + for (Map.Entry entry : orderedTreeMap.entrySet()) { + orderedColumns.add(entry.getKey()); + Value value = entry.getValue(); + valueColumns.add(value.valueColumn); + typeNames.add(value.typeName); + } + return new Mapping( + ArrayUtils.toPrimitive(orderedColumns.toArray(new Integer[0])), + ArrayUtils.toPrimitive(valueColumns.toArray(new Integer[0])), + typeNames.toArray(new String[0])); + + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java new file mode 100644 index 0000000..491e8a4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; + +/** + * This class collects column information for copying a row from one VectorizedRowBatch to + * same/another batch. + * + * In this variation, column information is ordered by the output column number. + */ +public class VectorColumnOutputMapping extends VectorColumnMapping { + + private static final long serialVersionUID = 1L; + + @Override + public void add(int sourceColumn, int outputColumn, String typeName) { + // Order on outputColumn. + vectorColumnMapping.add(outputColumn, sourceColumn, typeName); + } + + public boolean containsOutputColumn(int outputColumn) { + return vectorColumnMapping.orderedColumnsContain(outputColumn); + } + + @Override + public void finalize() { + Mapping mapping = vectorColumnMapping.getMapping(); + + // Ordered columns are the output columns. + sourceColumns = mapping.getValueColumns(); + outputColumns = mapping.getOrderedColumns(); + typeNames = mapping.getTypeNames(); + + // Not needed anymore. + vectorColumnMapping = null; + } + +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java new file mode 100644 index 0000000..c1c53ba --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; + +/** + * This class collects column information for copying a row from one VectorizedRowBatch to + * same/another batch. + * + * In this variation, column information is ordered by the source column number. + */ +public class VectorColumnSourceMapping extends VectorColumnMapping { + + private static final long serialVersionUID = 1L; + + @Override + public void add(int sourceColumn, int outputColumn, String typeName) { + // Order on sourceColumn. + vectorColumnMapping.add(sourceColumn, outputColumn, typeName); + } + + @Override + public void finalize() { + Mapping mapping = vectorColumnMapping.getMapping(); + + // Ordered columns are the source columns. + sourceColumns = mapping.getOrderedColumns(); + outputColumns = mapping.getValueColumns(); + typeNames = mapping.getTypeNames(); + + // Not needed anymore. + vectorColumnMapping = null; + } + + public boolean isSourceSequenceGood() { + int count = sourceColumns.length; + for (int i = 0; i < count; i++) { + if (sourceColumns[i] != i) { + return false; + } + } + return true; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java new file mode 100644 index 0000000..e010e45 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -0,0 +1,246 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This class copies specified columns of a row from one VectorizedRowBatch to another. + */ +public class VectorCopyRow { + + protected static transient final Log LOG = LogFactory.getLog(VectorCopyRow.class); + + private abstract class CopyRow { + protected int inColumnIndex; + protected int outColumnIndex; + + CopyRow(int inColumnIndex, int outColumnIndex) { + this.inColumnIndex = inColumnIndex; + this.outColumnIndex = outColumnIndex; + } + + abstract void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex); + } + + private class LongCopyRow extends CopyRow { + + LongCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + LongColumnVector inColVector = (LongColumnVector) inBatch.cols[inColumnIndex]; + LongColumnVector outColVector = (LongColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.vector[outBatchIndex] = inColVector.vector[0]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.vector[outBatchIndex] = inColVector.vector[inBatchIndex]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private class DoubleCopyRow extends CopyRow { + + DoubleCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + DoubleColumnVector inColVector = (DoubleColumnVector) inBatch.cols[inColumnIndex]; + DoubleColumnVector outColVector = (DoubleColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.vector[outBatchIndex] = inColVector.vector[0]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.vector[outBatchIndex] = inColVector.vector[inBatchIndex]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private abstract class AbstractBytesCopyRow extends CopyRow { + + AbstractBytesCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + } + + private class BytesCopyRowByValue extends AbstractBytesCopyRow { + + BytesCopyRowByValue(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + BytesColumnVector inColVector = (BytesColumnVector) inBatch.cols[inColumnIndex]; + BytesColumnVector outColVector = (BytesColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.setVal(outBatchIndex, inColVector.vector[0], inColVector.start[0], inColVector.length[0]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.setVal(outBatchIndex, inColVector.vector[inBatchIndex], inColVector.start[inBatchIndex], inColVector.length[inBatchIndex]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private class BytesCopyRowByReference extends AbstractBytesCopyRow { + + BytesCopyRowByReference(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + BytesColumnVector inColVector = (BytesColumnVector) inBatch.cols[inColumnIndex]; + BytesColumnVector outColVector = (BytesColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.setRef(outBatchIndex, inColVector.vector[0], inColVector.start[0], inColVector.length[0]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.setRef(outBatchIndex, inColVector.vector[inBatchIndex], inColVector.start[inBatchIndex], inColVector.length[inBatchIndex]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private class DecimalCopyRow extends CopyRow { + + DecimalCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + DecimalColumnVector inColVector = (DecimalColumnVector) inBatch.cols[inColumnIndex]; + DecimalColumnVector outColVector = (DecimalColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.set(outBatchIndex, inColVector.vector[0]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.set(outBatchIndex, inColVector.vector[inBatchIndex]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private CopyRow[] subRowToBatchCopiersByValue; + private CopyRow[] subRowToBatchCopiersByReference; + + public void init(VectorColumnMapping columnMapping) { + int count = columnMapping.getCount(); + subRowToBatchCopiersByValue = new CopyRow[count]; + subRowToBatchCopiersByReference = new CopyRow[count]; + + for (int i = 0; i < count; i++) { + int inputColumn = columnMapping.getInputColumns()[i]; + int outputColumn = columnMapping.getOutputColumns()[i]; + String typeName = columnMapping.getTypeNames()[i]; + + CopyRow copyRowByValue = null; + CopyRow copyRowByReference = null; + + if (VectorizationContext.isIntFamily(typeName) || + VectorizationContext.isDatetimeFamily(typeName)) { + copyRowByValue = new LongCopyRow(inputColumn, outputColumn); + } else if (VectorizationContext.isFloatFamily(typeName)) { + copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn); + } else if (VectorizationContext.isStringFamily(typeName)) { + copyRowByValue = new BytesCopyRowByValue(inputColumn, outputColumn); + copyRowByReference = new BytesCopyRowByReference(inputColumn, outputColumn); + } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){ + copyRowByValue = new DecimalCopyRow(inputColumn, outputColumn); + } else { + throw new Error("Cannot allocate vector copy row for " + typeName); + } + subRowToBatchCopiersByValue[i] = copyRowByValue; + if (copyRowByReference == null) { + subRowToBatchCopiersByReference[i] = copyRowByValue; + } else { + subRowToBatchCopiersByReference[i] = copyRowByReference; + } + } + } + + /* + * Use this copy method when the source batch may get reused before the target batch is finished. + * Any bytes column vector values will be copied to the target by value into the column's + * data buffer. + */ + public void copyByValue(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + for (CopyRow copyRow : subRowToBatchCopiersByValue) { + copyRow.copy(inBatch, inBatchIndex, outBatch, outBatchIndex); + } + } + + /* + * Use this copy method when the source batch is safe and will remain around until the target + * batch is finished. + * + * Any bytes column vector values will be referenced by the target column instead of copying. + */ + public void copyByReference(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + for (CopyRow copyRow : subRowToBatchCopiersByReference) { + copyRow.copy(inBatch, inBatchIndex, outBatch, outBatchIndex); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java new file mode 100644 index 0000000..22106c6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -0,0 +1,711 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.EOFException; +import java.io.IOException; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hive.common.util.DateUtils; + +/** + * This class deserializes a serialization format into a row of a VectorizedRowBatch. + * + * The caller provides the hive type names and output column numbers in the order desired to + * deserialize. + * + * This class uses an provided DeserializeRead object to directly deserialize by reading + * field-by-field from a serialization format into the primitive values of the VectorizedRowBatch. + */ + +public class VectorDeserializeRow { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorDeserializeRow.class); + + private DeserializeRead deserializeRead; + + private Reader[] readersByValue; + private Reader[] readersByReference; + private PrimitiveTypeInfo[] primitiveTypeInfos; + + public VectorDeserializeRow(DeserializeRead deserializeRead) { + this(); + this.deserializeRead = deserializeRead; + primitiveTypeInfos = deserializeRead.primitiveTypeInfos(); + + } + + // Not public since we must have the deserialize read object. + private VectorDeserializeRow() { + } + + private abstract class Reader { + protected int columnIndex; + + Reader(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract void apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongReader extends Reader { + + AbstractLongReader(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanReader extends AbstractLongReader { + + BooleanReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + boolean value = deserializeRead.readBoolean(); + colVector.vector[batchIndex] = (value ? 1 : 0); + } + } + } + + private class ByteReader extends AbstractLongReader { + + ByteReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + byte value = deserializeRead.readByte(); + colVector.vector[batchIndex] = (long) value; + } + } + } + + private class ShortReader extends AbstractLongReader { + + ShortReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + short value = deserializeRead.readShort(); + colVector.vector[batchIndex] = (long) value; + } + } + } + + private class IntReader extends AbstractLongReader { + + IntReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + int value = deserializeRead.readInt(); + colVector.vector[batchIndex] = (long) value; + } + } + } + + private class LongReader extends AbstractLongReader { + + LongReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + long value = deserializeRead.readLong(); + colVector.vector[batchIndex] = value; + } + } + } + + private class DateReader extends AbstractLongReader { + + DeserializeRead.ReadDateResults readDateResults; + + DateReader(int columnIndex) { + super(columnIndex); + readDateResults = deserializeRead.createReadDateResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readDate(readDateResults); + colVector.vector[batchIndex] = (long) readDateResults.getDays(); + } + } + } + + private class TimestampReader extends AbstractLongReader { + + DeserializeRead.ReadTimestampResults readTimestampResults; + + TimestampReader(int columnIndex) { + super(columnIndex); + readTimestampResults = deserializeRead.createReadTimestampResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readTimestamp(readTimestampResults); + Timestamp t = readTimestampResults.getTimestamp(); + colVector.vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + } + } + } + + private class IntervalYearMonthReader extends AbstractLongReader { + + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults; + + IntervalYearMonthReader(int columnIndex) { + super(columnIndex); + readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth hiym = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + colVector.vector[batchIndex] = hiym.getTotalMonths(); + } + } + } + + private class IntervalDayTimeReader extends AbstractLongReader { + + DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults; + + IntervalDayTimeReader(int columnIndex) { + super(columnIndex); + readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + HiveIntervalDayTime hidt = readIntervalDayTimeResults.getHiveIntervalDayTime(); + colVector.vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(hidt); + } + } + } + + private abstract class AbstractDoubleReader extends Reader { + + AbstractDoubleReader(int columnIndex) { + super(columnIndex); + } + } + + private class FloatReader extends AbstractDoubleReader { + + FloatReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + float value = deserializeRead.readFloat(); + colVector.vector[batchIndex] = (double) value; + } + } + } + + private class DoubleReader extends AbstractDoubleReader { + + DoubleReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + double value = deserializeRead.readDouble(); + colVector.vector[batchIndex] = value; + } + } + } + + private abstract class AbstractBytesReader extends Reader { + + AbstractBytesReader(int columnIndex) { + super(columnIndex); + } + } + + private class StringReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + StringReaderByValue(int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readString(readStringResults); + colVector.setVal(batchIndex, readStringResults.bytes, + readStringResults.start, readStringResults.length); + } + } + } + + private class StringReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + StringReaderByReference(int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readString(readStringResults); + colVector.setRef(batchIndex, readStringResults.bytes, + readStringResults.start, readStringResults.length); + } + } + } + + private class CharReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private CharTypeInfo charTypeInfo; + + CharReaderByValue(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.charTypeInfo = charTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.rightTrimAndTruncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, charTypeInfo.getLength()); + colVector.setVal(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class CharReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private CharTypeInfo charTypeInfo; + + CharReaderByReference(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.charTypeInfo = charTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.rightTrimAndTruncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, charTypeInfo.getLength()); + colVector.setRef(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class VarcharReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private VarcharTypeInfo varcharTypeInfo; + + VarcharReaderByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.varcharTypeInfo = varcharTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.truncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, varcharTypeInfo.getLength()); + colVector.setVal(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class VarcharReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private VarcharTypeInfo varcharTypeInfo; + + VarcharReaderByReference(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.varcharTypeInfo = varcharTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.truncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, varcharTypeInfo.getLength()); + colVector.setRef(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class BinaryReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadBinaryResults readBinaryResults; + + BinaryReaderByValue(int columnIndex) { + super(columnIndex); + readBinaryResults = deserializeRead.createReadBinaryResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readBinary(readBinaryResults); + colVector.setVal(batchIndex, readBinaryResults.bytes, + readBinaryResults.start, readBinaryResults.length); + } + } + } + + private class BinaryReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadBinaryResults readBinaryResults; + + BinaryReaderByReference(int columnIndex) { + super(columnIndex); + readBinaryResults = deserializeRead.createReadBinaryResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readBinary(readBinaryResults); + colVector.setRef(batchIndex, readBinaryResults.bytes, + readBinaryResults.start, readBinaryResults.length); + } + } + } + + private class HiveDecimalReader extends Reader { + + private DeserializeRead.ReadDecimalResults readDecimalResults; + + HiveDecimalReader(int columnIndex) { + super(columnIndex); + readDecimalResults = deserializeRead.createReadDecimalResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readHiveDecimal(readDecimalResults); + HiveDecimal hiveDecimal = readDecimalResults.getHiveDecimal(); + colVector.vector[batchIndex].set(hiveDecimal); + } + } + } + + private void addReader(int index, int outputColumn) throws HiveException { + Reader readerByValue = null; + Reader readerByReference = null; + + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; + case BOOLEAN: + readerByValue = new BooleanReader(outputColumn); + break; + case BYTE: + readerByValue = new ByteReader(outputColumn); + break; + case SHORT: + readerByValue = new ShortReader(outputColumn); + break; + case INT: + readerByValue = new IntReader(outputColumn); + break; + case LONG: + readerByValue = new LongReader(outputColumn); + break; + case DATE: + readerByValue = new DateReader(outputColumn); + break; + case TIMESTAMP: + readerByValue = new TimestampReader(outputColumn); + break; + case FLOAT: + readerByValue = new FloatReader(outputColumn); + break; + case DOUBLE: + readerByValue = new DoubleReader(outputColumn); + break; + case STRING: + readerByValue = new StringReaderByValue(outputColumn); + readerByReference = new StringReaderByReference(outputColumn); + break; + case CHAR: + { + CharTypeInfo charTypeInfo = (CharTypeInfo) primitiveTypeInfo; + readerByValue = new CharReaderByValue(charTypeInfo, outputColumn); + readerByReference = new CharReaderByReference(charTypeInfo, outputColumn); + } + break; + case VARCHAR: + { + VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) primitiveTypeInfo; + readerByValue = new VarcharReaderByValue(varcharTypeInfo, outputColumn); + readerByReference = new VarcharReaderByReference(varcharTypeInfo, outputColumn); + } + break; + case BINARY: + readerByValue = new BinaryReaderByValue(outputColumn); + readerByReference = new BinaryReaderByReference(outputColumn); + break; + case DECIMAL: + readerByValue = new HiveDecimalReader(outputColumn); + break; + case INTERVAL_YEAR_MONTH: + readerByValue = new IntervalYearMonthReader(outputColumn); + break; + case INTERVAL_DAY_TIME: + readerByValue = new IntervalDayTimeReader(outputColumn); + break; + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + + readersByValue[index] = readerByValue; + if (readerByReference == null) { + readersByReference[index] = readerByValue; + } else { + readersByReference[index] = readerByReference; + } + } + + public void init(int[] outputColumns) throws HiveException { + + readersByValue = new Reader[primitiveTypeInfos.length]; + readersByReference = new Reader[primitiveTypeInfos.length]; + + for (int i = 0; i < primitiveTypeInfos.length; i++) { + int outputColumn = outputColumns[i]; + addReader(i, outputColumn); + } + } + + public void init(int startColumn) throws HiveException { + + readersByValue = new Reader[primitiveTypeInfos.length]; + readersByReference = new Reader[primitiveTypeInfos.length]; + + for (int i = 0; i < primitiveTypeInfos.length; i++) { + int outputColumn = startColumn + i; + addReader(i, outputColumn); + } + } + + public void init() throws HiveException { + init(0); + } + + public void setBytes(byte[] bytes, int offset, int length) { + deserializeRead.set(bytes, offset, length); + } + + public void deserializeByValue(VectorizedRowBatch batch, int batchIndex) throws IOException { + int i = 0; + try { + while (i < readersByValue.length) { + readersByValue[i].apply(batch, batchIndex); + i++; // Increment after the apply which could throw an exception. + } + } catch (EOFException e) { + throwMoreDetailedException(e, i); + } + deserializeRead.extraFieldsCheck(); + } + + public void deserializeByReference(VectorizedRowBatch batch, int batchIndex) throws IOException { + int i = 0; + try { + while (i < readersByReference.length) { + readersByReference[i].apply(batch, batchIndex); + i++; // Increment after the apply which could throw an exception. + } + } catch (EOFException e) { + throwMoreDetailedException(e, i); + } + deserializeRead.extraFieldsCheck(); + } + + private void throwMoreDetailedException(IOException e, int index) throws EOFException { + StringBuilder sb = new StringBuilder(); + sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " + primitiveTypeInfos.length + " fields ("); + for (int i = 0; i < primitiveTypeInfos.length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(primitiveTypeInfos[i].getPrimitiveCategory().name()); + } + sb.append(")"); + throw new EOFException(sb.toString()); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java new file mode 100644 index 0000000..ee6939d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -0,0 +1,735 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is abstract to allow the subclasses to control batch reuse. + */ +public abstract class VectorExtractRow { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorExtractRow.class); + + private boolean tolerateNullColumns; + + public VectorExtractRow() { + // UNDONE: For now allow null columns until vector_decimal_mapjoin.q is understood... + tolerateNullColumns = true; + } + + protected abstract class Extractor { + protected int columnIndex; + protected Object object; + + public Extractor(int columnIndex) { + this.columnIndex = columnIndex; + } + + public int getColumnIndex() { + return columnIndex; + } + + abstract void setColumnVector(VectorizedRowBatch batch); + + abstract void forgetColumnVector(); + + abstract Object extract(int batchIndex); + } + + private class VoidExtractor extends Extractor { + + VoidExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + } + + @Override + void forgetColumnVector() { + } + + @Override + Object extract(int batchIndex) { + return null; + } + } + + private abstract class AbstractLongExtractor extends Extractor { + + protected LongColumnVector colVector; + protected long[] vector; + + AbstractLongExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (LongColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + protected class BooleanExtractor extends AbstractLongExtractor { + + BooleanExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.create(false); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.set(object, value == 0 ? false : true); + return object; + } else { + return null; + } + } + } + + protected class ByteExtractor extends AbstractLongExtractor { + + ByteExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableByteObjectInspector.create((byte) 0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableByteObjectInspector.set(object, (byte) value); + return object; + } else { + return null; + } + } + } + + private class ShortExtractor extends AbstractLongExtractor { + + ShortExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableShortObjectInspector.create((short) 0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableShortObjectInspector.set(object, (short) value); + return object; + } else { + return null; + } + } + } + + private class IntExtractor extends AbstractLongExtractor { + + IntExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableIntObjectInspector.create(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableIntObjectInspector.set(object, (int) value); + return object; + } else { + return null; + } + } + } + + private class LongExtractor extends AbstractLongExtractor { + + LongExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableLongObjectInspector.create(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableLongObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + private class DateExtractor extends AbstractLongExtractor { + + private Date date; + + DateExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableDateObjectInspector.create(new Date(0)); + date = new Date(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + date.setTime(DateWritable.daysToMillis((int) value)); + PrimitiveObjectInspectorFactory.writableDateObjectInspector.set(object, date); + return object; + } else { + return null; + } + } + } + + private class TimestampExtractor extends AbstractLongExtractor { + + private Timestamp timestamp; + + TimestampExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(new Timestamp(0)); + timestamp = new Timestamp(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + TimestampUtils.assignTimeInNanoSec(value, timestamp); + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.set(object, timestamp); + return object; + } else { + return null; + } + } + } + + private class IntervalYearMonthExtractor extends AbstractLongExtractor { + + private HiveIntervalYearMonth hiveIntervalYearMonth; + + IntervalYearMonthExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(new HiveIntervalYearMonth(0)); + hiveIntervalYearMonth = new HiveIntervalYearMonth(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + int totalMonths = (int) vector[adjustedIndex]; + hiveIntervalYearMonth.set(totalMonths); + PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.set(object, hiveIntervalYearMonth); + return object; + } else { + return null; + } + } + } + + private class IntervalDayTimeExtractor extends AbstractLongExtractor { + + private HiveIntervalDayTime hiveIntervalDayTime; + + IntervalDayTimeExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(new HiveIntervalDayTime(0, 0)); + hiveIntervalDayTime = new HiveIntervalDayTime(0, 0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, value); + PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.set(object, hiveIntervalDayTime); + return object; + } else { + return null; + } + } + } + + private abstract class AbstractDoubleExtractor extends Extractor { + + protected DoubleColumnVector colVector; + protected double[] vector; + + AbstractDoubleExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DoubleColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + private class FloatExtractor extends AbstractDoubleExtractor { + + FloatExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableFloatObjectInspector.create(0f); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + double value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableFloatObjectInspector.set(object, (float) value); + return object; + } else { + return null; + } + } + } + + private class DoubleExtractor extends AbstractDoubleExtractor { + + DoubleExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.create(0f); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + double value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + private abstract class AbstractBytesExtractor extends Extractor { + + protected BytesColumnVector colVector; + + AbstractBytesExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (BytesColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + } + + private class BinaryExtractorByValue extends AbstractBytesExtractor { + + private DataOutputBuffer buffer; + + // Use the BytesWritable instance here as a reference to data saved in buffer. We do not + // want to pass the binary object inspector a byte[] since we would need to allocate it on the + // heap each time to get the length correct. + private BytesWritable bytesWritable; + + BinaryExtractorByValue(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + buffer = new DataOutputBuffer(); + bytesWritable = new BytesWritable(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] bytes = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Save a copy of the binary data. + buffer.reset(); + try { + buffer.write(bytes, start, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + + bytesWritable.set(buffer.getData(), 0, buffer.getLength()); + PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.set(object, bytesWritable); + return object; + } else { + return null; + } + } + } + + private class StringExtractorByValue extends AbstractBytesExtractor { + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + StringExtractorByValue(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableStringObjectInspector.create(StringUtils.EMPTY); + text = new Text(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] value = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + PrimitiveObjectInspectorFactory.writableStringObjectInspector.set(object, text); + return object; + } else { + return null; + } + } + } + + private class VarCharExtractorByValue extends AbstractBytesExtractor { + + // We need our own instance of the VARCHAR object inspector to hold the maximum length + // from the TypeInfo. + private WritableHiveVarcharObjectInspector writableVarcharObjectInspector; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + /* + * @param varcharTypeInfo + * We need the VARCHAR type information that contains the maximum length. + * @param columnIndex + * The vector row batch column that contains the bytes for the VARCHAR. + */ + VarCharExtractorByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector(varcharTypeInfo); + object = writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] value = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + writableVarcharObjectInspector.set(object, text.toString()); + return object; + } else { + return null; + } + } + } + + private class CharExtractorByValue extends AbstractBytesExtractor { + + // We need our own instance of the CHAR object inspector to hold the maximum length + // from the TypeInfo. + private WritableHiveCharObjectInspector writableCharObjectInspector; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + /* + * @param varcharTypeInfo + * We need the CHAR type information that contains the maximum length. + * @param columnIndex + * The vector row batch column that contains the bytes for the CHAR. + */ + CharExtractorByValue(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + writableCharObjectInspector = new WritableHiveCharObjectInspector(charTypeInfo); + object = writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] value = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + writableCharObjectInspector.set(object, text.toString()); + return object; + } else { + return null; + } + } + } + + private class DecimalExtractor extends Extractor { + + private WritableHiveDecimalObjectInspector writableDecimalObjectInspector; + protected DecimalColumnVector colVector; + + /* + * @param decimalTypeInfo + * We need the DECIMAL type information that contains scale and precision. + * @param columnIndex + * The vector row batch column that contains the bytes for the VARCHAR. + */ + DecimalExtractor(DecimalTypeInfo decimalTypeInfo, int columnIndex) { + super(columnIndex); + writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector(decimalTypeInfo); + object = writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DecimalColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + HiveDecimal value = colVector.vector[adjustedIndex].getHiveDecimal(); + writableDecimalObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + private Extractor createExtractor(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + Extractor extracter; + switch (primitiveCategory) { + case VOID: + extracter = new VoidExtractor(columnIndex); + break; + case BOOLEAN: + extracter = new BooleanExtractor(columnIndex); + break; + case BYTE: + extracter = new ByteExtractor(columnIndex); + break; + case SHORT: + extracter = new ShortExtractor(columnIndex); + break; + case INT: + extracter = new IntExtractor(columnIndex); + break; + case LONG: + extracter = new LongExtractor(columnIndex); + break; + case TIMESTAMP: + extracter = new TimestampExtractor(columnIndex); + break; + case DATE: + extracter = new DateExtractor(columnIndex); + break; + case FLOAT: + extracter = new FloatExtractor(columnIndex); + break; + case DOUBLE: + extracter = new DoubleExtractor(columnIndex); + break; + case BINARY: + extracter = new BinaryExtractorByValue(columnIndex); + break; + case STRING: + extracter = new StringExtractorByValue(columnIndex); + break; + case VARCHAR: + extracter = new VarCharExtractorByValue((VarcharTypeInfo) primitiveTypeInfo, columnIndex); + break; + case CHAR: + extracter = new CharExtractorByValue((CharTypeInfo) primitiveTypeInfo, columnIndex); + break; + case DECIMAL: + extracter = new DecimalExtractor((DecimalTypeInfo) primitiveTypeInfo, columnIndex); + break; + case INTERVAL_YEAR_MONTH: + extracter = new IntervalYearMonthExtractor(columnIndex); + break; + case INTERVAL_DAY_TIME: + extracter = new IntervalDayTimeExtractor(columnIndex); + break; + default: + throw new HiveException("No vector row extracter for primitive category " + + primitiveCategory); + } + return extracter; + } + + Extractor[] extracters; + + public void init(StructObjectInspector structObjectInspector, List projectedColumns) throws HiveException { + + extracters = new Extractor[projectedColumns.size()]; + + List fields = structObjectInspector.getAllStructFieldRefs(); + + int i = 0; + for (StructField field : fields) { + int columnIndex = projectedColumns.get(i); + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( + fieldInspector.getTypeName()); + extracters[i] = createExtractor(primitiveTypeInfo, columnIndex); + i++; + } + } + + public void init(List typeNames) throws HiveException { + + extracters = new Extractor[typeNames.size()]; + + int i = 0; + for (String typeName : typeNames) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + extracters[i] = createExtractor(primitiveTypeInfo, i); + i++; + } + } + + public int getCount() { + return extracters.length; + } + + protected void setBatch(VectorizedRowBatch batch) throws HiveException { + + for (int i = 0; i < extracters.length; i++) { + Extractor extracter = extracters[i]; + int columnIndex = extracter.getColumnIndex(); + if (batch.cols[columnIndex] == null) { + if (tolerateNullColumns) { + // Replace with void... + extracter = new VoidExtractor(columnIndex); + extracters[i] = extracter; + } else { + throw new HiveException("Unexpected null vector column " + columnIndex); + } + } + extracter.setColumnVector(batch); + } + } + + protected void forgetBatch() { + for (Extractor extracter : extracters) { + extracter.forgetColumnVector(); + } + } + + public Object extractRowColumn(int batchIndex, int logicalColumnIndex) { + return extracters[logicalColumnIndex].extract(batchIndex); + } + + public void extractRow(int batchIndex, Object[] objects) { + int i = 0; + for (Extractor extracter : extracters) { + objects[i++] = extracter.extract(batchIndex); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java new file mode 100644 index 0000000..0ff7145 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is for use when the batch being assigned is always the same. + */ +public class VectorExtractRowDynBatch extends VectorExtractRow { + + public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } + + public void forgetBatchOnExit() { + forgetBatch(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java new file mode 100644 index 0000000..faec0aa --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is for use when the batch being assigned is always the same. + */ +public class VectorExtractRowSameBatch extends VectorExtractRow { + + public void setOneBatch(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 4ca82e4..bd4fb42 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -766,8 +766,7 @@ public VectorGroupByOperator(VectorizationContext vContext, OperatorDesc conf) isVectorOutput = desc.getVectorDesc().isVectorOutput(); - vOutContext = new VectorizationContext(desc.getOutputColumnNames()); - vOutContext.setFileKey(vContext.getFileKey() + "/_GROUPBY_"); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); } public VectorGroupByOperator() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 6efc849..bbc8d60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -120,8 +120,7 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); // We are making a new output vectorized row batch. - vOutContext = new VectorizationContext(desc.getOutputColumnNames()); - vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index ebb6840..1fe5c4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -117,8 +117,7 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); // We are making a new output vectorized row batch. - vOutContext = new VectorizationContext(desc.getOutputColumnNames()); - vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index f854bdb..212aa99 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -68,10 +68,7 @@ public VectorSelectOperator(VectorizationContext vContext, OperatorDesc conf) * Create a new vectorization context to create a new projection, but keep * same output column manager must be inherited to track the scratch the columns. */ - vOutContext = new VectorizationContext(vContext); - - // Set a fileKey, although this operator doesn't use it. - vOutContext.setFileKey(vContext.getFileKey() + "/_SELECT_"); + vOutContext = new VectorizationContext(getName(), vContext); vOutContext.resetProjectionColumns(); for (int i=0; i < colList.size(); ++i) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java new file mode 100644 index 0000000..ceb18c6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -0,0 +1,626 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class serializes columns from a row in a VectorizedRowBatch into a serialization format. + * + * The caller provides the hive type names and column numbers in the order desired to + * serialize. + * + * This class uses an provided SerializeWrite object to directly serialize by writing + * field-by-field into a serialization format from the primitive values of the VectorizedRowBatch. + * + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. + */ +public class VectorSerializeRow { + + private SerializeWrite serializeWrite; + + public VectorSerializeRow(SerializeWrite serializeWrite) { + this(); + this.serializeWrite = serializeWrite; + } + + // Not public since we must have the serialize write object. + private VectorSerializeRow() { + } + + private abstract class Writer { + protected int columnIndex; + + Writer(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongWriter extends Writer { + + AbstractLongWriter(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanWriter extends AbstractLongWriter { + + BooleanWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeBoolean(colVector.vector[0] != 0); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeBoolean(colVector.vector[batchIndex] != 0); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class ByteWriter extends AbstractLongWriter { + + ByteWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeByte((byte) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeByte((byte) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class ShortWriter extends AbstractLongWriter { + + ShortWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeShort((short) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeShort((short) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntWriter extends AbstractLongWriter { + + IntWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeInt((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeInt((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class LongWriter extends AbstractLongWriter { + + LongWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeLong(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeLong(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class DateWriter extends AbstractLongWriter { + + DateWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeDate((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeDate((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class TimestampWriter extends AbstractLongWriter { + + Timestamp scratchTimestamp; + + TimestampWriter(int columnIndex) { + super(columnIndex); + scratchTimestamp = new Timestamp(0); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + TimestampUtils.assignTimeInNanoSec(colVector.vector[0], scratchTimestamp); + serializeWrite.writeTimestamp(scratchTimestamp); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + TimestampUtils.assignTimeInNanoSec(colVector.vector[batchIndex], scratchTimestamp); + serializeWrite.writeTimestamp(scratchTimestamp); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntervalYearMonthWriter extends AbstractLongWriter { + + IntervalYearMonthWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntervalDayTimeWriter extends AbstractLongWriter { + + IntervalDayTimeWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveIntervalDayTime(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveIntervalDayTime(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private abstract class AbstractDoubleWriter extends Writer { + + AbstractDoubleWriter(int columnIndex) { + super(columnIndex); + } + } + + private class FloatWriter extends AbstractDoubleWriter { + + FloatWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeFloat((float) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeFloat((float) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class DoubleWriter extends AbstractDoubleWriter { + + DoubleWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeDouble(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeDouble(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class StringWriter extends Writer { + + StringWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeString(colVector.vector[0], colVector.start[0], colVector.length[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeString(colVector.vector[batchIndex], + colVector.start[batchIndex], colVector.length[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class BinaryWriter extends Writer { + + BinaryWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeBinary(colVector.vector[0], colVector.start[0], colVector.length[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeBinary(colVector.vector[batchIndex], + colVector.start[batchIndex], colVector.length[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class HiveDecimalWriter extends Writer { + protected HiveDecimalWritable[] vector; + + HiveDecimalWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveDecimal(colVector.vector[0].getHiveDecimal()); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveDecimal(colVector.vector[batchIndex].getHiveDecimal()); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private Writer[] writers; + + private Writer createWriter(TypeInfo typeInfo, int columnIndex) throws HiveException { + Writer writer; + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; + case BOOLEAN: + writer = new BooleanWriter(columnIndex); + break; + case BYTE: + writer = new ByteWriter(columnIndex); + break; + case SHORT: + writer = new ShortWriter(columnIndex); + break; + case INT: + writer = new IntWriter(columnIndex); + break; + case LONG: + writer = new LongWriter(columnIndex); + break; + case DATE: + writer = new DateWriter(columnIndex); + break; + case TIMESTAMP: + writer = new TimestampWriter(columnIndex); + break; + case FLOAT: + writer = new FloatWriter(columnIndex); + break; + case DOUBLE: + writer = new DoubleWriter(columnIndex); + break; + case STRING: + case CHAR: + case VARCHAR: + // We store CHAR and VARCHAR without pads, so use STRING writer class. + writer = new StringWriter(columnIndex); + break; + case BINARY: + writer = new BinaryWriter(columnIndex); + break; + case DECIMAL: + writer = new HiveDecimalWriter(columnIndex); + break; + case INTERVAL_YEAR_MONTH: + writer = new IntervalYearMonthWriter(columnIndex); + break; + case INTERVAL_DAY_TIME: + writer = new IntervalDayTimeWriter(columnIndex); + break; + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + } + break; + default: + throw new HiveException("Unexpected type category " + category); + } + return writer; + } + + public void init(List typeNames, int[] columnMap) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfo, columnIndex); + writers[i] = writer; + } + } + + public void init(List typeNames) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Writer writer = createWriter(typeInfo, i); + writers[i] = writer; + } + } + + public int getCount() { + return writers.length; + } + + public void setOutput(Output output) { + serializeWrite.set(output); + } + + /* + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. batchIndex is the actual index of the row. + */ + public boolean serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException { + boolean anyNulls = false; + for (Writer writer : writers) { + if (!writer.apply(batch, batchIndex)) { + anyNulls = true; + } + } + return anyNulls; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRowNoNulls.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRowNoNulls.java new file mode 100644 index 0000000..4a9fdcd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRowNoNulls.java @@ -0,0 +1,395 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class serializes columns from a row in a VectorizedRowBatch into a serialization format. + * + * The caller provides the hive type names and column numbers in the order desired to + * serialize. + * + * This class uses an provided SerializeWrite object to directly serialize by writing + * field-by-field into a serialization format from the primitive values of the VectorizedRowBatch. + * + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. + * + * NOTE: This class is a variation of VectorSerializeRow for serialization of columns that + * have no nulls. + */ +public class VectorSerializeRowNoNulls { + private static final Log LOG = LogFactory.getLog(VectorSerializeRowNoNulls.class.getName()); + + private SerializeWrite serializeWrite; + + public VectorSerializeRowNoNulls(SerializeWrite serializeWrite) { + this(); + this.serializeWrite = serializeWrite; + } + + // Not public since we must have the serialize write object. + private VectorSerializeRowNoNulls() { + } + + private abstract class Writer { + protected int columnIndex; + + Writer(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract void apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongWriter extends Writer { + + AbstractLongWriter(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanWriter extends AbstractLongWriter { + + BooleanWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeBoolean(colVector.vector[colVector.isRepeating ? 0 : batchIndex] != 0); + } + } + + private class ByteWriter extends AbstractLongWriter { + + ByteWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeByte((byte) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class ShortWriter extends AbstractLongWriter { + + ShortWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeShort((short) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class IntWriter extends AbstractLongWriter { + + IntWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeInt((int) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class LongWriter extends AbstractLongWriter { + + LongWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeLong(colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class DateWriter extends AbstractLongWriter { + + DateWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeDate((int) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class TimestampWriter extends AbstractLongWriter { + + Timestamp scratchTimestamp; + + TimestampWriter(int columnIndex) { + super(columnIndex); + scratchTimestamp = new Timestamp(0); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampUtils.assignTimeInNanoSec(colVector.vector[colVector.isRepeating ? 0 : batchIndex], scratchTimestamp); + serializeWrite.writeTimestamp(scratchTimestamp); + } + } + + private class IntervalYearMonthWriter extends AbstractLongWriter { + + IntervalYearMonthWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class IntervalDayTimeWriter extends AbstractLongWriter { + + IntervalDayTimeWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeHiveIntervalDayTime(colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private abstract class AbstractDoubleWriter extends Writer { + + AbstractDoubleWriter(int columnIndex) { + super(columnIndex); + } + } + + private class FloatWriter extends AbstractDoubleWriter { + + FloatWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + serializeWrite.writeFloat((float) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class DoubleWriter extends AbstractDoubleWriter { + + DoubleWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + serializeWrite.writeDouble(colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class StringWriter extends Writer { + + StringWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + serializeWrite.writeString(colVector.vector[0], colVector.start[0], colVector.length[0]); + } else { + serializeWrite.writeString(colVector.vector[batchIndex], colVector.start[batchIndex], colVector.length[batchIndex]); + } + } + } + + private class BinaryWriter extends Writer { + + BinaryWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + serializeWrite.writeBinary(colVector.vector[0], colVector.start[0], colVector.length[0]); + } else { + serializeWrite.writeBinary(colVector.vector[batchIndex], colVector.start[batchIndex], colVector.length[batchIndex]); + } + } + } + + private class HiveDecimalWriter extends Writer { + + HiveDecimalWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + serializeWrite.writeHiveDecimal(colVector.vector[colVector.isRepeating ? 0 : batchIndex].getHiveDecimal()); + } + } + + private Writer[] writers; + + private Writer createWriter(TypeInfo typeInfo, int columnIndex) throws HiveException { + Writer writer; + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; + case BOOLEAN: + writer = new BooleanWriter(columnIndex); + break; + case BYTE: + writer = new ByteWriter(columnIndex); + break; + case SHORT: + writer = new ShortWriter(columnIndex); + break; + case INT: + writer = new IntWriter(columnIndex); + break; + case LONG: + writer = new LongWriter(columnIndex); + break; + case DATE: + writer = new DateWriter(columnIndex); + break; + case TIMESTAMP: + writer = new TimestampWriter(columnIndex); + break; + case FLOAT: + writer = new FloatWriter(columnIndex); + break; + case DOUBLE: + writer = new DoubleWriter(columnIndex); + break; + case STRING: + case CHAR: + case VARCHAR: + // We store CHAR and VARCHAR without pads, so use STRING writer class. + writer = new StringWriter(columnIndex); + break; + case BINARY: + writer = new BinaryWriter(columnIndex); + break; + case DECIMAL: + writer = new HiveDecimalWriter(columnIndex); + break; + case INTERVAL_YEAR_MONTH: + writer = new IntervalYearMonthWriter(columnIndex); + break; + case INTERVAL_DAY_TIME: + writer = new IntervalDayTimeWriter(columnIndex); + break; + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + } + break; + default: + throw new HiveException("Unexpected type category " + category); + } + return writer; + } + + public void init(List typeNames, int[] columnMap) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfo, columnIndex); + writers[i] = writer; + } + } + + public void init(List typeNames) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Writer writer = createWriter(typeInfo, i); + writers[i] = writer; + } + } + + public int getCount() { + return writers.length; + } + + public void setOutput(Output output) { + serializeWrite.set(output); + } + + /* + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. batchIndex is the actual index of the row. + */ + public void serializeWriteNoNulls(VectorizedRowBatch batch, int batchIndex) throws IOException { + for (Writer writer : writers) { + writer.apply(batch, batchIndex); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 14a1059..90c34cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -31,8 +31,10 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import java.util.regex.Pattern; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -128,6 +130,9 @@ private static final Log LOG = LogFactory.getLog( VectorizationContext.class.getName()); + private String contextName; + private int level; + VectorExpressionDescriptor vMap; private List projectedColumns; @@ -140,7 +145,10 @@ // Convenient constructor for initial batch creation takes // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext(List initialColumnNames) { + public VectorizationContext(String contextName, List initialColumnNames) { + this.contextName = contextName; + level = 0; + LOG.info("VectorizationContext consructor contextName " + contextName + " level " + level + " initialColumnNames " + initialColumnNames.toString()); this.projectionColumnNames = initialColumnNames; projectedColumns = new ArrayList(); @@ -157,8 +165,11 @@ public VectorizationContext(List initialColumnNames) { // Constructor to with the individual addInitialColumn method // followed by a call to finishedAddingInitialColumns. - public VectorizationContext() { - projectedColumns = new ArrayList(); + public VectorizationContext(String contextName) { + this.contextName = contextName; + level = 0; + LOG.info("VectorizationContext consructor contextName " + contextName + " level " + level); + projectedColumns = new ArrayList(); projectionColumnNames = new ArrayList(); projectionColumnMap = new HashMap(); this.ocm = new OutputColumnManager(0); @@ -169,7 +180,10 @@ public VectorizationContext() { // Constructor useful making a projection vectorization context. // Use with resetProjectionColumns and addProjectionColumn. // Keeps existing output column map, etc. - public VectorizationContext(VectorizationContext vContext) { + public VectorizationContext(String contextName, VectorizationContext vContext) { + this.contextName = contextName; + level = vContext.level + 1; + LOG.info("VectorizationContext consructor reference contextName " + contextName + " level " + level); this.projectedColumns = new ArrayList(); this.projectionColumnNames = new ArrayList(); this.projectionColumnMap = new HashMap(); @@ -238,13 +252,6 @@ public void addProjectionColumn(String columnName, int vectorBatchColIndex) { //Map column number to type private OutputColumnManager ocm; - // File key is used by operators to retrieve the scratch vectors - // from mapWork at runtime. The operators that modify the structure of - // a vector row batch, need to allocate scratch vectors as well. Every - // operator that creates a new Vectorization context should set a unique - // fileKey. - private String fileKey = null; - // Set of UDF classes for type casting data types in row-mode. private static Set> castExpressionUdfs = new HashSet>(); static { @@ -268,14 +275,6 @@ public void addProjectionColumn(String columnName, int vectorBatchColIndex) { castExpressionUdfs.add(UDFToShort.class); } - public String getFileKey() { - return fileKey; - } - - public void setFileKey(String fileKey) { - this.fileKey = fileKey; - } - protected int getInputColumnIndex(String name) throws HiveException { if (name == null) { throw new HiveException("Null column name"); @@ -316,6 +315,7 @@ int allocateOutputColumn(String hiveTypeName) { // We need to differentiate DECIMAL columns by their precision and scale... String normalizedTypeName = getNormalizedName(hiveTypeName); int relativeCol = allocateOutputColumnInternal(normalizedTypeName); + // LOG.info("allocateOutputColumn for hiveTypeName " + hiveTypeName + " column " + (initialOutputCol + relativeCol)); return initialOutputCol + relativeCol; } @@ -357,6 +357,22 @@ void freeOutputColumn(int index) { usedOutputColumns.remove(index-initialOutputCol); } } + + public int[] currentScratchColumns() { + TreeSet treeSet = new TreeSet(); + for (Integer col : usedOutputColumns) { + treeSet.add(initialOutputCol + col); + } + return ArrayUtils.toPrimitive(treeSet.toArray(new Integer[0])); + } + } + + public int allocateScratchColumn(String hiveTypeName) { + return ocm.allocateOutputColumn(hiveTypeName); + } + + public int[] currentScratchColumns() { + return ocm.currentScratchColumns(); } private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc @@ -2106,6 +2122,10 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); } + public int firstOutputColumnIndex() { + return firstOutputColumnIndex; + } + public Map getScratchColumnTypeMap() { Map map = new HashMap(); for (int i = 0; i < ocm.outputColCount; i++) { @@ -2117,7 +2137,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, b public String toString() { StringBuilder sb = new StringBuilder(32); - sb.append("Context key ").append(getFileKey()).append(", "); + sb.append("Context name ").append(contextName).append(", level " + level + ", "); Comparator comparerInteger = new Comparator() { @Override @@ -2129,11 +2149,11 @@ public int compare(Integer o1, Integer o2) { for (Map.Entry entry : projectionColumnMap.entrySet()) { sortedColumnMap.put(entry.getValue(), entry.getKey()); } - sb.append("sortedProjectionColumnMap ").append(sortedColumnMap).append(", "); + sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", "); Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); sortedScratchColumnTypeMap.putAll(getScratchColumnTypeMap()); - sb.append("sortedScratchColumnTypeMap ").append(sortedScratchColumnTypeMap); + sb.append("sorted scratchColumnTypeMap ").append(sortedScratchColumnTypeMap); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 96f1a6e..e9cf6b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -29,7 +30,6 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -43,10 +43,15 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; @@ -542,5 +547,98 @@ private static void setVector(Object row, poi.getPrimitiveCategory()); } } -} + public static StandardStructObjectInspector convertToStandardStructObjectInspector( + StructObjectInspector structObjectInspector) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + List oids = new ArrayList(); + ArrayList columnNames = new ArrayList(); + + for(StructField field : fields) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + field.getFieldObjectInspector().getTypeName()); + ObjectInspector standardWritableObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); + oids.add(standardWritableObjectInspector); + columnNames.add(field.getFieldName()); + } + return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames,oids); + } + + public static PrimitiveTypeInfo[] primitiveTypeInfosFromStructObjectInspector( + StructObjectInspector structObjectInspector) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + PrimitiveTypeInfo[] result = new PrimitiveTypeInfo[fields.size()]; + + int i = 0; + for(StructField field : fields) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + field.getFieldObjectInspector().getTypeName()); + result[i++] = (PrimitiveTypeInfo) typeInfo; + } + return result; + } + + + public static String displayBytes(byte[] bytes, int start, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < start + length; i++) { + char ch = (char) bytes[i]; + if (ch < ' ' || ch > '~') { + sb.append(String.format("\\%03d", (int) (bytes[i] & 0xff))); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { + StringBuffer sb = new StringBuffer(); + sb.append(prefix + " row " + index + " "); + for (int i = 0; i < batch.projectionSize; i++) { + int column = batch.projectedColumns[i]; + ColumnVector colVector = batch.cols[column]; + if (colVector == null) { + sb.append("(null colVector " + column + ")"); + } else { + boolean isRepeating = colVector.isRepeating; + index = (isRepeating ? 0 : index); + if (colVector.noNulls || !colVector.isNull[index]) { + if (colVector instanceof LongColumnVector) { + sb.append(((LongColumnVector) colVector).vector[index]); + } else if (colVector instanceof DoubleColumnVector) { + sb.append(((DoubleColumnVector) colVector).vector[index]); + } else if (colVector instanceof BytesColumnVector) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector; + byte[] bytes = bytesColumnVector.vector[index]; + int start = bytesColumnVector.start[index]; + int length = bytesColumnVector.length[index]; + if (bytes == null) { + sb.append("(Unexpected null bytes with start " + start + " length " + length + ")"); + } else { + sb.append(displayBytes(bytes, start, length)); + } + } else if (colVector instanceof DecimalColumnVector) { + sb.append(((DecimalColumnVector) colVector).vector[index].toString()); + } else { + sb.append("Unknown"); + } + } else { + sb.append("NULL"); + } + } + sb.append(" "); + } + System.out.println(sb.toString()); + } + + public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) throws HiveException { + for (int i = 0; i < batch.size; i++) { + int index = (batch.selectedInUse ? batch.selected[i] : i); + debugDisplayOneRow(batch, index, prefix); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 4364572..7e41384 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -190,6 +190,7 @@ public void setValueWriters(VectorExpressionWriter[] valueWriters) { * - sets size to 0 * - sets endOfFile to false * - resets each column + * - inits each column */ public void reset() { selectedInUse = false; @@ -198,6 +199,7 @@ public void reset() { for (ColumnVector vc : cols) { if (vc != null) { vc.reset(); + vc.init(); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index b48c2ca..77c3652 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; @@ -174,9 +173,8 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx split.getPath(), IOPrepareCache.get().getPartitionDescMap()); String partitionPath = split.getPath().getParent().toString(); - scratchColumnTypeMap = Utilities - .getMapWorkAllScratchColumnVectorTypeMaps(hiveConf) - .get(partitionPath); + scratchColumnTypeMap = Utilities.getMapWorkVectorScratchColumnTypeMap(hiveConf); + // LOG.info("VectorizedRowBatchCtx init scratchColumnTypeMap " + scratchColumnTypeMap.toString()); Properties partProps = (part.getPartSpec() == null || part.getPartSpec().isEmpty()) ? @@ -631,7 +629,7 @@ private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveExcepti for (int i = origNumCols; i < newNumCols; i++) { String typeName = scratchColumnTypeMap.get(i); if (typeName == null) { - throw new HiveException("No type found for column type entry " + i); + throw new HiveException("No type entry found for column " + i + " in map " + scratchColumnTypeMap.toString()); } vrb.cols[i] = allocateColumnVector(typeName, VectorizedRowBatch.DEFAULT_SIZE); @@ -646,7 +644,7 @@ private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveExcepti * @param decimalType The given decimal type string. * @return An integer array of size 2 with first element set to precision and second set to scale. */ - private int[] getScalePrecisionFromDecimalType(String decimalType) { + private static int[] getScalePrecisionFromDecimalType(String decimalType) { Pattern p = Pattern.compile("\\d+"); Matcher m = p.matcher(decimalType); m.find(); @@ -657,7 +655,7 @@ private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveExcepti return precScale; } - private ColumnVector allocateColumnVector(String type, int defaultSize) { + public static ColumnVector allocateColumnVector(String type, int defaultSize) { if (type.equalsIgnoreCase("double")) { return new DoubleColumnVector(defaultSize); } else if (VectorizationContext.isStringFamily(type)) { @@ -675,18 +673,4 @@ private ColumnVector allocateColumnVector(String type, int defaultSize) { throw new Error("Cannot allocate vector column for " + type); } } - - public VectorColumnAssign[] buildObjectAssigners(VectorizedRowBatch outputBatch) - throws HiveException { - List fieldRefs = rowOI.getAllStructFieldRefs(); - assert outputBatch.numCols == fieldRefs.size(); - VectorColumnAssign[] assigners = new VectorColumnAssign[fieldRefs.size()]; - for(int i = 0; i < assigners.length; ++i) { - StructField fieldRef = fieldRefs.get(i); - ObjectInspector fieldOI = fieldRef.getFieldObjectInspector(); - assigners[i] = VectorColumnAssignFactory.buildObjectAssign( - outputBatch, i, fieldOI); - } - return assigners; - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 44ab1bd..5f731d7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -136,7 +136,8 @@ Set supportedAggregationUdfs = new HashSet(); - private PhysicalContext physicalContext = null;; + private PhysicalContext physicalContext = null; + private HiveConf hiveConf; public Vectorizer() { @@ -286,13 +287,13 @@ public Vectorizer() { class VectorizationDispatcher implements Dispatcher { - private final PhysicalContext pctx; + private final PhysicalContext physicalContext; private List reduceColumnNames; private List reduceTypeInfos; - public VectorizationDispatcher(PhysicalContext pctx) { - this.pctx = pctx; + public VectorizationDispatcher(PhysicalContext physicalContext) { + this.physicalContext = physicalContext; reduceColumnNames = null; reduceTypeInfos = null; } @@ -310,7 +311,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) convertMapWork((MapWork) w, true); } else if (w instanceof ReduceWork) { // We are only vectorizing Reduce under Tez. - if (HiveConf.getBoolVar(pctx.getConf(), + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { convertReduceWork((ReduceWork) w); } @@ -322,7 +323,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) if (baseWork instanceof MapWork) { convertMapWork((MapWork) baseWork, false); } else if (baseWork instanceof ReduceWork - && HiveConf.getBoolVar(pctx.getConf(), + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { convertReduceWork((ReduceWork) baseWork); } @@ -393,13 +394,12 @@ private void vectorizeMapWork(MapWork mapWork) throws SemanticException { HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - Map> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps(); - mapWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps); - Map> allColumnVectorMaps = vnp.getAllColumnVectorMaps(); - mapWork.setAllColumnVectorMaps(allColumnVectorMaps); + mapWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); + mapWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); + mapWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); if (LOG.isDebugEnabled()) { - debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps); + debugDisplayAllMaps(mapWork); } return; @@ -495,7 +495,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(reduceColumnNames); + new ReduceWorkVectorizationNodeProcessor(reduceColumnNames, reduceTypeInfos); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -510,14 +510,12 @@ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException // Necessary since we are vectorizing the root operator in reduce. reduceWork.setReducer(vnp.getRootVectorOp()); - Map> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps(); - reduceWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps); - Map> allColumnVectorMaps = vnp.getAllColumnVectorMaps(); - reduceWork.setAllColumnVectorMaps(allColumnVectorMaps); - + reduceWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); + reduceWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); + reduceWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); if (LOG.isDebugEnabled()) { - debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps); + debugDisplayAllMaps(reduceWork); } } } @@ -574,38 +572,34 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // ReduceWorkVectorizationNodeProcessor. class VectorizationNodeProcessor implements NodeProcessor { - // This is used to extract scratch column types for each file key - protected final Map scratchColumnContext = - new HashMap(); + // The vectorization context for the Map or Reduce task. + protected VectorizationContext taskVectorizationContext; - protected final Map, VectorizationContext> vContextsByOp = - new HashMap, VectorizationContext>(); + // The input projection column type name map for the Map or Reduce task. + protected Map taskColumnTypeNameMap; - protected final Set> opsDone = - new HashSet>(); + VectorizationNodeProcessor() { + taskColumnTypeNameMap = new HashMap(); + } - public Map> getAllScratchColumnVectorTypeMaps() { - Map> allScratchColumnVectorTypeMaps = - new HashMap>(); - for (String onefile : scratchColumnContext.keySet()) { - VectorizationContext vc = scratchColumnContext.get(onefile); - Map cmap = vc.getScratchColumnTypeMap(); - allScratchColumnVectorTypeMaps.put(onefile, cmap); - } - return allScratchColumnVectorTypeMaps; + public Map getVectorColumnNameMap() { + return taskVectorizationContext.getProjectionColumnMap(); } - public Map> getAllColumnVectorMaps() { - Map> allColumnVectorMaps = - new HashMap>(); - for(String oneFile: scratchColumnContext.keySet()) { - VectorizationContext vc = scratchColumnContext.get(oneFile); - Map cmap = vc.getProjectionColumnMap(); - allColumnVectorMaps.put(oneFile, cmap); - } - return allColumnVectorMaps; + public Map getVectorColumnTypeMap() { + return taskColumnTypeNameMap; + } + + public Map getVectorScratchColumnTypeMap() { + return taskVectorizationContext.getScratchColumnTypeMap(); } + protected final Set> opsDone = + new HashSet>(); + + protected final Map, Operator> opToVectorOpMap = + new HashMap, Operator>(); + public VectorizationContext walkStackToFindVectorizationContext(Stack stack, Operator op) throws SemanticException { VectorizationContext vContext = null; @@ -622,7 +616,18 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac return null; } Operator opParent = (Operator) stack.get(i); - vContext = vContextsByOp.get(opParent); + Operator vectorOpParent = opToVectorOpMap.get(opParent); + if (vectorOpParent != null) { + if (vectorOpParent instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOpParent; + vContext = vcRegion.getOuputVectorizationContext(); + LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " has new vectorization context " + vContext.toString()); + } else { + LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " does not have new vectorization context"); + } + } else { + LOG.info("walkStackToFindVectorizationContext " + opParent.getName() + " is not vectorized"); + } --i; } return vContext; @@ -636,14 +641,9 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac vectorOp = vectorizeOperator(op, vContext); opsDone.add(op); if (vectorOp != op) { + opToVectorOpMap.put(op, vectorOp); opsDone.add(vectorOp); } - if (vectorOp instanceof VectorizationContextRegion) { - VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; - VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - vContextsByOp.put(op, vOutContext); - scratchColumnContext.put(vOutContext.getFileKey(), vOutContext); - } } } catch (HiveException e) { throw new SemanticException(e); @@ -663,6 +663,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private final MapWork mWork; public MapWorkVectorizationNodeProcessor(MapWork mWork) { + super(); this.mWork = mWork; } @@ -671,41 +672,26 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { Operator op = (Operator) nd; - LOG.info("MapWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "..."); VectorizationContext vContext = null; if (op instanceof TableScanOperator) { - vContext = getVectorizationContext(op, physicalContext); - for (String onefile : mWork.getPathToAliases().keySet()) { - List aliases = mWork.getPathToAliases().get(onefile); - for (String alias : aliases) { - Operator opRoot = mWork.getAliasToWork().get(alias); - if (op == opRoot) { - // The same vectorization context is copied multiple times into - // the MapWork scratch columnMap - // Each partition gets a copy - // - vContext.setFileKey(onefile); - scratchColumnContext.put(onefile, vContext); - if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized MapWork operator " + op.getName() + " vectorization context " + vContext.toString()); - } - break; - } - } + if (taskVectorizationContext == null) { + taskVectorizationContext = getVectorizationContext(op.getSchema(), op.getName(), + taskColumnTypeNameMap); } - vContextsByOp.put(op, vContext); + vContext = taskVectorizationContext; } else { + LOG.info("MapWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName()); vContext = walkStackToFindVectorizationContext(stack, op); if (vContext == null) { - throw new SemanticException( - String.format("Did not find vectorization context for operator %s in operator stack", - op.getName())); + // No operator has "pushed" a new context -- so use the task vectorization context. + vContext = taskVectorizationContext; } } assert vContext != null; + LOG.info("MapWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't // vectorize the operators below it. @@ -720,9 +706,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Operator vectorOp = doVectorize(op, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString()); if (vectorOp instanceof VectorizationContextRegion) { - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString()); + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); } } @@ -733,8 +720,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { private final List reduceColumnNames; - - private VectorizationContext reduceShuffleVectorizationContext; + private final List reduceTypeInfos; private Operator rootVectorOp; @@ -742,10 +728,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames) { + public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames, + List reduceTypeInfos) { + super(); this.reduceColumnNames = reduceColumnNames; + this.reduceTypeInfos = reduceTypeInfos; rootVectorOp = null; - reduceShuffleVectorizationContext = null; } @Override @@ -753,8 +741,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { Operator op = (Operator) nd; - LOG.info("ReduceWorkVectorizationNodeProcessor processing Operator: " + - op.getName() + "..."); VectorizationContext vContext = null; @@ -763,25 +749,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op.getParentOperators().size() == 0) { LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString()); - vContext = new VectorizationContext(reduceColumnNames); - vContext.setFileKey("_REDUCE_SHUFFLE_"); - scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext); - reduceShuffleVectorizationContext = vContext; + vContext = new VectorizationContext("__Reduce_Shuffle__", reduceColumnNames); + taskVectorizationContext = vContext; + int i = 0; + for (TypeInfo typeInfo : reduceTypeInfos) { + taskColumnTypeNameMap.put(i, typeInfo.getTypeName()); + i++; + } saveRootVectorOp = true; if (LOG.isDebugEnabled()) { LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString()); } } else { + LOG.info("ReduceWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName()); vContext = walkStackToFindVectorizationContext(stack, op); if (vContext == null) { // If we didn't find a context among the operators, assume the top -- reduce shuffle's // vectorization context. - vContext = reduceShuffleVectorizationContext; + vContext = taskVectorizationContext; } } assert vContext != null; + LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't // vectorize the operators below it. @@ -796,9 +787,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Operator vectorOp = doVectorize(op, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString()); if (vectorOp instanceof VectorizationContextRegion) { - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString()); + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); } } if (vectorOp instanceof VectorGroupByOperator) { @@ -816,7 +808,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private static class ValidatorVectorizationContext extends VectorizationContext { private ValidatorVectorizationContext() { - super(); + super("No Name"); } @Override @@ -831,25 +823,27 @@ protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) { } @Override - public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { - this.physicalContext = pctx; - boolean vectorPath = HiveConf.getBoolVar(pctx.getConf(), + public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException { + this.physicalContext = physicalContext; + hiveConf = physicalContext.getConf(); + + boolean vectorPath = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); if (!vectorPath) { LOG.info("Vectorization is disabled"); - return pctx; + return physicalContext; } // create dispatcher and graph walker - Dispatcher disp = new VectorizationDispatcher(pctx); + Dispatcher disp = new VectorizationDispatcher(physicalContext); TaskGraphWalker ogw = new TaskGraphWalker(disp); // get all the tasks nodes from root task ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getRootTasks()); + topNodes.addAll(physicalContext.getRootTasks()); // begin to walk through the task tree. ogw.startWalking(topNodes, null); - return pctx; + return physicalContext; } boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { @@ -901,7 +895,7 @@ boolean validateReduceWorkOperator(Operator op) { } break; case GROUPBY: - if (HiveConf.getBoolVar(physicalContext.getConf(), + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { ret = validateGroupByOperator((GroupByOperator) op, true, true); } else { @@ -1262,20 +1256,24 @@ private boolean validateDataType(String type) { return supportedDataTypesPattern.matcher(type.toLowerCase()).matches(); } - private VectorizationContext getVectorizationContext(Operator op, - PhysicalContext pctx) { - RowSchema rs = op.getSchema(); + private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName, + Map typeNameMap) { + + VectorizationContext vContext = new VectorizationContext(contextName); // Add all non-virtual columns to make a vectorization context for // the TableScan operator. - VectorizationContext vContext = new VectorizationContext(); - for (ColumnInfo c : rs.getSignature()) { + int i = 0; + for (ColumnInfo c : rowSchema.getSignature()) { // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). if (!isVirtualColumn(c)) { vContext.addInitialColumn(c.getInternalName()); + typeNameMap.put(i, c.getTypeName()); + i++; } } vContext.finishedAddingInitialColumns(); + return vContext; } @@ -1333,40 +1331,14 @@ private boolean isVirtualColumn(ColumnInfo column) { return false; } - public void debugDisplayAllMaps(Map> allColumnVectorMaps, - Map> allScratchColumnVectorTypeMaps) { - - // Context keys grow in length since they are a path... - Comparator comparerShorterString = new Comparator() { - @Override - public int compare(String o1, String o2) { - Integer length1 = o1.length(); - Integer length2 = o2.length(); - return length1.compareTo(length2); - }}; - - Comparator comparerInteger = new Comparator() { - @Override - public int compare(Integer o1, Integer o2) { - return o1.compareTo(o2); - }}; - - Map> sortedAllColumnVectorMaps = new TreeMap>(comparerShorterString); - for (Map.Entry> entry : allColumnVectorMaps.entrySet()) { - Map sortedColumnMap = new TreeMap(comparerInteger); - for (Map.Entry innerEntry : entry.getValue().entrySet()) { - sortedColumnMap.put(innerEntry.getValue(), innerEntry.getKey()); - } - sortedAllColumnVectorMaps.put(entry.getKey(), sortedColumnMap); - } - LOG.debug("sortedAllColumnVectorMaps " + sortedAllColumnVectorMaps); + public void debugDisplayAllMaps(BaseWork work) { - Map> sortedAllScratchColumnVectorTypeMap = new TreeMap>(comparerShorterString); - for (Map.Entry> entry : allScratchColumnVectorTypeMaps.entrySet()) { - Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); - sortedScratchColumnTypeMap.putAll(entry.getValue()); - sortedAllScratchColumnVectorTypeMap.put(entry.getKey(), sortedScratchColumnTypeMap); - } - LOG.debug("sortedAllScratchColumnVectorTypeMap " + sortedAllScratchColumnVectorTypeMap); + Map columnNameMap = work.getVectorColumnNameMap(); + Map columnTypeMap = work.getVectorColumnTypeMap(); + Map scratchColumnTypeMap = work.getVectorScratchColumnTypeMap(); + + LOG.debug("debugDisplayAllMaps columnNameMap " + columnNameMap.toString()); + LOG.debug("debugDisplayAllMaps columnTypeMap " + columnTypeMap.toString()); + LOG.debug("debugDisplayAllMaps scratchColumnTypeMap " + scratchColumnTypeMap.toString()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 0e449b6..4f9221e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -60,8 +60,9 @@ public BaseWork(String name) { private String name; // Vectorization. - protected Map> allScratchColumnVectorTypeMaps = null; - protected Map> allColumnVectorMaps = null; + protected Map vectorColumnNameMap; + protected Map vectorColumnTypeMap; + protected Map vectorScratchColumnTypeMap; public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; @@ -143,21 +144,28 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { return returnSet; } - public Map> getAllScratchColumnVectorTypeMaps() { - return allScratchColumnVectorTypeMaps; + public Map getVectorColumnNameMap() { + return vectorColumnNameMap; } - public void setAllScratchColumnVectorTypeMaps( - Map> allScratchColumnVectorTypeMaps) { - this.allScratchColumnVectorTypeMaps = allScratchColumnVectorTypeMaps; + public void setVectorColumnNameMap(Map vectorColumnNameMap) { + this.vectorColumnNameMap = vectorColumnNameMap; } - public Map> getAllColumnVectorMaps() { - return allColumnVectorMaps; + public Map getVectorColumnTypeMap() { + return vectorColumnTypeMap; } - public void setAllColumnVectorMaps(Map> allColumnVectorMaps) { - this.allColumnVectorMaps = allColumnVectorMaps; + public void setVectorColumnTypeMap(Map vectorColumnTypeMap) { + this.vectorColumnTypeMap = vectorColumnTypeMap; + } + + public Map getVectorScratchColumnTypeMap() { + return vectorScratchColumnTypeMap; + } + + public void setVectorScratchColumnTypeMap(Map vectorScratchColumnTypeMap) { + this.vectorScratchColumnTypeMap = vectorScratchColumnTypeMap; } /** diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java new file mode 100644 index 0000000..6f5d1a0 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java @@ -0,0 +1,405 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hive.common.util.DateUtils; + +/** + * Generate object inspector and random row object[]. + */ +public class RandomRowObjectSource { + + private Random r; + + private int columnCount; + + private List typeNames; + + private PrimitiveCategory[] primitiveCategories; + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private List primitiveObjectInspectorList; + + private StructObjectInspector rowStructObjectInspector; + + public List typeNames() { + return typeNames; + } + + public PrimitiveCategory[] primitiveCategories() { + return primitiveCategories; + } + + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + public StructObjectInspector rowStructObjectInspector() { + return rowStructObjectInspector; + } + + public void init(Random r) { + this.r = r; + chooseSchema(); + } + + private static String[] possibleHiveTypeNames = { + "boolean", + "tinyint", + "smallint", + "int", + "bigint", + "date", + "float", + "double", + "string", + "char", + "varchar", + "binary", + "date", + "timestamp", + serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME, + serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME, + "decimal" + }; + + private void chooseSchema() { + columnCount = 1 + r.nextInt(20); + typeNames = new ArrayList(columnCount); + primitiveCategories = new PrimitiveCategory[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + primitiveObjectInspectorList = new ArrayList(columnCount); + List columnNames = new ArrayList(columnCount); + for (int c = 0; c < columnCount; c++) { + columnNames.add(String.format("col%d", c)); + int typeNum = r.nextInt(possibleHiveTypeNames.length); + String typeName = possibleHiveTypeNames[typeNum]; + if (typeName.equals("char")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("char(%d)", maxLength); + } else if (typeName.equals("varchar")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("varchar(%d)", maxLength); + } else if (typeName.equals("decimal")) { + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + primitiveTypeInfos[c] = primitiveTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[c] = primitiveCategory; + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); + typeNames.add(typeName); + } + rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + } + + public Object[][] randomRows(int n) { + Object[][] result = new Object[n][]; + for (int i = 0; i < n; i++) { + result[i] = randomRow(); + } + return result; + } + + public Object[] randomRow() { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + Object object = randomObject(c); + if (object == null) { + throw new Error("Unexpected null for column " + c); + } + row[c] = getWritableObject(c, object); + if (row[c] == null) { + throw new Error("Unexpected null for writable for column " + c); + } + } + return row; + } + + public Object getWritableObject(int column, Object object) { + ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); + case BYTE: + return ((WritableByteObjectInspector) objectInspector).create((byte) object); + case SHORT: + return ((WritableShortObjectInspector) objectInspector).create((short) object); + case INT: + return ((WritableIntObjectInspector) objectInspector).create((int) object); + case LONG: + return ((WritableLongObjectInspector) objectInspector).create((long) object); + case DATE: + return ((WritableDateObjectInspector) objectInspector).create((Date) object); + case FLOAT: + return ((WritableFloatObjectInspector) objectInspector).create((float) object); + case DOUBLE: + return ((WritableDoubleObjectInspector) objectInspector).create((double) object); + case STRING: + return ((WritableStringObjectInspector) objectInspector).create((String) object); + case CHAR: + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + } + case VARCHAR: + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + } + case BINARY: + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + case TIMESTAMP: + return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); + case INTERVAL_YEAR_MONTH: + return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); + case INTERVAL_DAY_TIME: + return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); + case DECIMAL: + { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public Object randomObject(int column) { + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return Boolean.valueOf(r.nextInt(1) == 1); + case BYTE: + return Byte.valueOf((byte) r.nextInt()); + case SHORT: + return Short.valueOf((short) r.nextInt()); + case INT: + return Integer.valueOf(r.nextInt()); + case LONG: + return Long.valueOf(r.nextLong()); + case DATE: + return getRandDate(r); + case FLOAT: + return Float.valueOf(r.nextFloat() * 10 - 5); + case DOUBLE: + return Double.valueOf(r.nextDouble() * 10 - 5); + case STRING: + return getRandString(r); + case CHAR: + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + case VARCHAR: + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + case BINARY: + return getRandBinary(r, 1 + r.nextInt(100)); + case TIMESTAMP: + return getRandTimestamp(r); + case INTERVAL_YEAR_MONTH: + return getRandIntervalYearMonth(r); + case INTERVAL_DAY_TIME: + return getRandIntervalDayTime(r); + case DECIMAL: + return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public static String getRandString(Random r) { + return getRandString(r, null, r.nextInt(10)); + } + + public static String getRandString(Random r, String characters, int length) { + if (characters == null) { + characters = "ABCDEFGHIJKLMabcdefghijklm"; + } + StringBuilder sb = new StringBuilder(); + sb.append(""); + for (int i = 0; i < length; i++) { + if (characters == null) { + sb.append((char) (r.nextInt(128))); + } else { + sb.append(characters.charAt(r.nextInt(characters.length()))); + } + } + return sb.toString(); + } + + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { + int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { + int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + if (bd.scale() > bd.precision()) { + // Sometimes weird decimals are produced? + continue; + } + + return bd; + } + } + + public static Date getRandDate(Random r) { + String dateStr = String.format("%d-%02d-%02d", + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28))); // day + Date dateVal = Date.valueOf(dateStr); + return dateVal; + } + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String timestampStr = String.format("%d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(1970 + r.nextInt(200)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal = Timestamp.valueOf(timestampStr); + return timestampVal; + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + TestCase.assertTrue(intervalYearMonthVal != null); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + TestCase.assertTrue(intervalDayTimeVal != null); + return intervalDayTimeVal; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index 8ccd5f2..3968c50 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -88,7 +88,7 @@ private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); return new VectorFilterOperator(vc, fdesc); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index f5ec7a7..8c84f30 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -173,7 +173,7 @@ public void testMemoryPressureFlush() throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildKeyGroupByDesc (ctx, "max", "Value", TypeInfoFactory.longTypeInfo, @@ -1710,7 +1710,7 @@ private void testMultiKey( mapColumnNames.put("value", i); outputColumnNames.add("value"); - VectorizationContext ctx = new VectorizationContext(outputColumnNames); + VectorizationContext ctx = new VectorizationContext("name", outputColumnNames); ArrayList aggs = new ArrayList(1); aggs.add( @@ -1821,7 +1821,7 @@ private void testKeyTypeAggregate( List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set keys = new HashSet(); AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, @@ -2235,7 +2235,7 @@ public void testAggregateCountStarIterable ( Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); @@ -2264,7 +2264,7 @@ public void testAggregateCountReduceIterable ( Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); @@ -2296,7 +2296,7 @@ public void testAggregateStringIterable ( Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.stringTypeInfo); @@ -2322,11 +2322,12 @@ public void testAggregateStringIterable ( } public void testAggregateDecimalIterable ( -String aggregateName, Iterable data, - Object expected) throws HiveException { - List mapColumnNames = new ArrayList(); - mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + String aggregateName, + Iterable data, + Object expected) throws HiveException { + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); @@ -2358,7 +2359,7 @@ public void testAggregateDoubleIterable ( Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", TypeInfoFactory.doubleTypeInfo); @@ -2389,7 +2390,7 @@ public void testAggregateLongIterable ( Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.longTypeInfo); @@ -2420,7 +2421,7 @@ public void testAggregateLongKeyIterable ( List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set keys = new HashSet(); @@ -2487,7 +2488,7 @@ public void testAggregateStringKeyIterable ( List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set keys = new HashSet(); GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value", diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java new file mode 100644 index 0000000..0f8712e --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; + +import junit.framework.TestCase; + +/** + * Unit test for the vectorized conversion to and from row object[]. + */ +public class TestVectorRowObject extends TestCase { + + void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + Object[][] randomRows, int firstRandomRowIndex ) { + + int rowSize = vectorExtractRow.getCount(); + Object[] row = new Object[rowSize]; + for (int i = 0; i < batch.size; i++) { + vectorExtractRow.extractRow(i, row); + Object[] expectedRow = randomRows[firstRandomRowIndex + i]; + for (int c = 0; c < rowSize; c++) { + if (!row[c].equals(expectedRow[c])) { + fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); + } + } + } + } + + void testVectorRowObject(int caseNum, Random r) throws HiveException { + + Map emptyScratchMap = new HashMap(); + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); + batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + vectorAssignRow.init(source.typeNames()); + vectorAssignRow.setOneBatch(batch); + + VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + vectorExtractRow.init(source.typeNames()); + vectorExtractRow.setOneBatch(batch); + + Object[][] randomRows = source.randomRows(100000); + int firstRandomRowIndex = 0; + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + + vectorAssignRow.assignRow(batch.size, row); + batch.size++; + if (batch.size == batch.DEFAULT_SIZE) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + firstRandomRowIndex = i + 1; + batch.reset(); + } + } + if (batch.size > 0) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + } + } + + public void testVectorRowObject() throws Throwable { + + try { + Random r = new Random(5678); + for (int c = 0; c < 10; c++) { + testVectorRowObject(c, r); + } + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index b482029..59961c5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -88,7 +88,7 @@ public void testSelectOperator() throws HiveException { columns.add("a"); columns.add("b"); columns.add("c"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); SelectDesc selDesc = new SelectDesc(false); List colList = new ArrayList(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java new file mode 100644 index 0000000..23e44f0 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -0,0 +1,658 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +import junit.framework.TestCase; + +/** + * Unit test for the vectorized serialize and deserialize row. + */ +public class TestVectorSerDeRow extends TestCase { + + public static enum SerializationType { + NONE, + BINARY_SORTABLE, + LAZY_BINARY, + LAZY_SIMPLE + } + + void deserializeAndVerify(Output output, DeserializeRead deserializeRead, + RandomRowObjectSource source, Object[] expectedRow) + throws HiveException, IOException { + deserializeRead.set(output.getData(), 0, output.getLength()); + PrimitiveCategory[] primitiveCategories = source.primitiveCategories(); + for (int i = 0; i < primitiveCategories.length; i++) { + Object expected = expectedRow[i]; + PrimitiveCategory primitiveCategory = primitiveCategories[i]; + PrimitiveTypeInfo primitiveTypeInfo = source.primitiveTypeInfos()[i]; + if (deserializeRead.readCheckNull()) { + throw new HiveException("Unexpected NULL"); + } + switch (primitiveCategory) { + case BOOLEAN: + { + Boolean value = deserializeRead.readBoolean(); + BooleanWritable expectedWritable = (BooleanWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case BYTE: + { + Byte value = deserializeRead.readByte(); + ByteWritable expectedWritable = (ByteWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); + } + } + break; + case SHORT: + { + Short value = deserializeRead.readShort(); + ShortWritable expectedWritable = (ShortWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case INT: + { + Integer value = deserializeRead.readInt(); + IntWritable expectedWritable = (IntWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case LONG: + { + Long value = deserializeRead.readLong(); + LongWritable expectedWritable = (LongWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case DATE: + { + DeserializeRead.ReadDateResults readDateResults = deserializeRead.createReadDateResults(); + deserializeRead.readDate(readDateResults); + Date value = readDateResults.getDate(); + DateWritable expectedWritable = (DateWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case FLOAT: + { + Float value = deserializeRead.readFloat(); + FloatWritable expectedWritable = (FloatWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case DOUBLE: + { + Double value = deserializeRead.readDouble(); + DoubleWritable expectedWritable = (DoubleWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case STRING: + { + DeserializeRead.ReadStringResults readStringResults = deserializeRead.createReadStringResults(); + deserializeRead.readString(readStringResults); + + char[] charsBuffer = new char[readStringResults.bytes.length]; + for (int c = 0; c < charsBuffer.length; c++) { + charsBuffer[c] = (char) (readStringResults.bytes[c] & 0xFF); + } + + byte[] stringBytes = Arrays.copyOfRange(readStringResults.bytes, readStringResults.start, readStringResults.start + readStringResults.length); + + char[] charsRange = new char[stringBytes.length]; + for (int c = 0; c < charsRange.length; c++) { + charsRange[c] = (char) (stringBytes[c] & 0xFF); + } + + Text text = new Text(stringBytes); + String value = text.toString(); + Text expectedWritable = (Text) expected; + if (!value.equals(expectedWritable.toString())) { + TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + value + "')"); + } + } + break; + case CHAR: + { + DeserializeRead.ReadHiveCharResults readHiveCharResults = deserializeRead.createReadHiveCharResults(); + deserializeRead.readHiveChar(readHiveCharResults); + HiveChar hiveChar = readHiveCharResults.getHiveChar(); + HiveCharWritable expectedWritable = (HiveCharWritable) expected; + if (!hiveChar.equals(expectedWritable.getHiveChar())) { + TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')"); + } + } + break; + case VARCHAR: + { + DeserializeRead.ReadHiveVarcharResults readHiveVarcharResults = deserializeRead.createReadHiveVarcharResults(); + deserializeRead.readHiveVarchar(readHiveVarcharResults); + HiveVarchar hiveVarchar = readHiveVarcharResults.getHiveVarchar(); + HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected; + if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) { + TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')"); + } + } + break; + case DECIMAL: + { + DeserializeRead.ReadDecimalResults readDecimalResults = deserializeRead.createReadDecimalResults(); + deserializeRead.readHiveDecimal(readDecimalResults); + HiveDecimal value = readDecimalResults.getHiveDecimal(); + if (value == null) { + TestCase.fail("Decimal field evaluated to NULL"); + } + HiveDecimalWritable expectedWritable = (HiveDecimalWritable) expected; + if (!value.equals(expectedWritable.getHiveDecimal())) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + TestCase.fail("Decimal field mismatch (expected " + expectedWritable.getHiveDecimal() + " found " + value.toString() + ") precision " + precision + ", scale " + scale); + } + } + break; + case TIMESTAMP: + { + DeserializeRead.ReadTimestampResults readTimestampResults = deserializeRead.createReadTimestampResults(); + deserializeRead.readTimestamp(readTimestampResults); + Timestamp value = readTimestampResults.getTimestamp(); + TimestampWritable expectedWritable = (TimestampWritable) expected; + if (!value.equals(expectedWritable.getTimestamp())) { + TestCase.fail("Timestamp field mismatch (expected " + expectedWritable.getTimestamp() + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_YEAR_MONTH: + { + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth value = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) expected; + HiveIntervalYearMonth expectedValue = expectedWritable.getHiveIntervalYearMonth(); + if (!value.equals(expectedValue)) { + TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expectedValue + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_DAY_TIME: + { + DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + HiveIntervalDayTime value = readIntervalDayTimeResults.getHiveIntervalDayTime(); + HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) expected; + HiveIntervalDayTime expectedValue = expectedWritable.getHiveIntervalDayTime(); + if (!value.equals(expectedValue)) { + TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expectedValue + " found " + value.toString() + ")"); + } + } + break; + case BINARY: + { + DeserializeRead.ReadBinaryResults readBinaryResults = deserializeRead.createReadBinaryResults(); + deserializeRead.readBinary(readBinaryResults); + byte[] byteArray = Arrays.copyOfRange(readBinaryResults.bytes, readBinaryResults.start, readBinaryResults.start + readBinaryResults.length); + BytesWritable expectedWritable = (BytesWritable) expected; + if (byteArray.length != expectedWritable.getLength()){ + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + byte[] expectedBytes = expectedWritable.getBytes(); + for (int b = 0; b < byteArray.length; b++) { + if (byteArray[b] != expectedBytes[b]) { + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + } + } + break; + default: + throw new HiveException("Unexpected primitive category " + primitiveCategory); + } + } + deserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!deserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!deserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!deserializeRead.bufferRangeHasExtraDataWarned()); + } + + void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, + DeserializeRead deserializeRead, RandomRowObjectSource source, Object[][] randomRows, + int firstRandomRowIndex) throws HiveException, IOException { + + Output output = new Output(); + for (int i = 0; i < batch.size; i++) { + output.reset(); + vectorSerializeRow.setOutput(output); + vectorSerializeRow.serializeWrite(batch, i); + Object[] expectedRow = randomRows[firstRandomRowIndex + i]; + + byte[] bytes = output.getData(); + int length = output.getLength(); + char[] chars = new char[length]; + for (int c = 0; c < chars.length; c++) { + chars[c] = (char) (bytes[c] & 0xFF); + } + + deserializeAndVerify(output, deserializeRead, source, expectedRow); + } + } + + void testVectorSerializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { + + Map emptyScratchMap = new HashMap(); + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); + batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + vectorAssignRow.init(source.typeNames()); + vectorAssignRow.setOneBatch(batch); + + int fieldCount = source.typeNames().size(); + DeserializeRead deserializeRead; + SerializeWrite serializeWrite; + switch (serializationType) { + case BINARY_SORTABLE: + deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new BinarySortableSerializeWrite(fieldCount); + break; + case LAZY_BINARY: + deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new LazyBinarySerializeWrite(fieldCount); + break; + case LAZY_SIMPLE: + { + StructObjectInspector rowObjectInspector = source.rowStructObjectInspector(); + LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector); + byte separator = (byte) '\t'; + deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), + separator, lazySerDeParams); + serializeWrite = new LazySimpleSerializeWrite(fieldCount, + separator, lazySerDeParams); + } + break; + default: + throw new Error("Unknown serialization type " + serializationType); + } + VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite); + vectorSerializeRow.init(source.typeNames()); + + Object[][] randomRows = source.randomRows(100000); + int firstRandomRowIndex = 0; + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + + vectorAssignRow.assignRow(batch.size, row); + batch.size++; + if (batch.size == batch.DEFAULT_SIZE) { + serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex); + firstRandomRowIndex = i + 1; + batch.reset(); + } + } + if (batch.size > 0) { + serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex); + } + } + + void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + Object[][] randomRows, int firstRandomRowIndex ) { + + int rowSize = vectorExtractRow.getCount(); + Object[] row = new Object[rowSize]; + for (int i = 0; i < batch.size; i++) { + vectorExtractRow.extractRow(i, row); + + Object[] expectedRow = randomRows[firstRandomRowIndex + i]; + + for (int c = 0; c < rowSize; c++) { + if (row[c] == null) { + fail("Unexpected NULL from extractRow"); + } + if (!row[c].equals(expectedRow[c])) { + fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); + } + } + } + } + + private Output serializeRow(Object[] row, RandomRowObjectSource source, SerializeWrite serializeWrite) throws HiveException, IOException { + Output output = new Output(); + serializeWrite.set(output); + PrimitiveCategory[] primitiveCategories = source.primitiveCategories(); + for (int i = 0; i < primitiveCategories.length; i++) { + Object object = row[i]; + PrimitiveCategory primitiveCategory = primitiveCategories[i]; + switch (primitiveCategory) { + case BOOLEAN: + { + BooleanWritable expectedWritable = (BooleanWritable) object; + boolean value = expectedWritable.get(); + serializeWrite.writeBoolean(value); + } + break; + case BYTE: + { + ByteWritable expectedWritable = (ByteWritable) object; + byte value = expectedWritable.get(); + serializeWrite.writeByte(value); + } + break; + case SHORT: + { + ShortWritable expectedWritable = (ShortWritable) object; + short value = expectedWritable.get(); + serializeWrite.writeShort(value); + } + break; + case INT: + { + IntWritable expectedWritable = (IntWritable) object; + int value = expectedWritable.get(); + serializeWrite.writeInt(value); + } + break; + case LONG: + { + LongWritable expectedWritable = (LongWritable) object; + long value = expectedWritable.get(); + serializeWrite.writeLong(value); + } + break; + case DATE: + { + DateWritable expectedWritable = (DateWritable) object; + Date value = expectedWritable.get(); + serializeWrite.writeDate(value); + } + break; + case FLOAT: + { + FloatWritable expectedWritable = (FloatWritable) object; + float value = expectedWritable.get(); + serializeWrite.writeFloat(value); + } + break; + case DOUBLE: + { + DoubleWritable expectedWritable = (DoubleWritable) object; + double value = expectedWritable.get(); + serializeWrite.writeDouble(value); + } + break; + case STRING: + { + Text text = (Text) object; + serializeWrite.writeString(text.getBytes(), 0, text.getLength()); + } + break; + case CHAR: + { + HiveCharWritable expectedWritable = (HiveCharWritable) object; + HiveChar value = expectedWritable.getHiveChar(); + serializeWrite.writeHiveChar(value); + } + break; + case VARCHAR: + { + HiveVarcharWritable expectedWritable = (HiveVarcharWritable) object; + HiveVarchar value = expectedWritable.getHiveVarchar(); + serializeWrite.writeHiveVarchar(value); + } + break; + case BINARY: + { + BytesWritable expectedWritable = (BytesWritable) object; + byte[] bytes = expectedWritable.getBytes(); + int length = expectedWritable.getLength(); + serializeWrite.writeBinary(bytes, 0, length); + } + break; + case TIMESTAMP: + { + TimestampWritable expectedWritable = (TimestampWritable) object; + Timestamp value = expectedWritable.getTimestamp(); + serializeWrite.writeTimestamp(value); + } + break; + case INTERVAL_YEAR_MONTH: + { + HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) object; + HiveIntervalYearMonth value = expectedWritable.getHiveIntervalYearMonth(); + serializeWrite.writeHiveIntervalYearMonth(value); + } + break; + case INTERVAL_DAY_TIME: + { + HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) object; + HiveIntervalDayTime value = expectedWritable.getHiveIntervalDayTime(); + serializeWrite.writeHiveIntervalDayTime(value); + } + break; + case DECIMAL: + { + HiveDecimalWritable expectedWritable = (HiveDecimalWritable) object; + HiveDecimal value = expectedWritable.getHiveDecimal(); + serializeWrite.writeHiveDecimal(value); + } + break; + default: + throw new HiveException("Unexpected primitive category " + primitiveCategory); + } + } + return output; + } + + private Properties createProperties(String fieldNames, String fieldTypes) { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + + tbl.setProperty("columns", fieldNames); + tbl.setProperty("columns.types", fieldTypes); + + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + + return tbl; + } + + private LazySerDeParameters getSerDeParams(StructObjectInspector rowObjectInspector) throws SerDeException { + String fieldNames = ObjectInspectorUtils.getFieldNames(rowObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowObjectInspector); + Configuration conf = new Configuration(); + Properties tbl = createProperties(fieldNames, fieldTypes); + return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); + } + + void testVectorDeserializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { + + Map emptyScratchMap = new HashMap(); + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); + batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + int fieldCount = source.typeNames().size(); + DeserializeRead deserializeRead; + SerializeWrite serializeWrite; + switch (serializationType) { + case BINARY_SORTABLE: + deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new BinarySortableSerializeWrite(fieldCount); + break; + case LAZY_BINARY: + deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new LazyBinarySerializeWrite(fieldCount); + break; + case LAZY_SIMPLE: + { + StructObjectInspector rowObjectInspector = source.rowStructObjectInspector(); + LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector); + byte separator = (byte) '\t'; + deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), + separator, lazySerDeParams); + serializeWrite = new LazySimpleSerializeWrite(fieldCount, + separator, lazySerDeParams); + } + break; + default: + throw new Error("Unknown serialization type " + serializationType); + } + VectorDeserializeRow vectorDeserializeRow = new VectorDeserializeRow(deserializeRead); + vectorDeserializeRow.init(); + + VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + vectorExtractRow.init(source.typeNames()); + vectorExtractRow.setOneBatch(batch); + + Object[][] randomRows = source.randomRows(100000); + int firstRandomRowIndex = 0; + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + + Output output = serializeRow(row, source, serializeWrite); + vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength()); + vectorDeserializeRow.deserializeByValue(batch, batch.size); + batch.size++; + if (batch.size == batch.DEFAULT_SIZE) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + firstRandomRowIndex = i + 1; + batch.reset(); + } + } + if (batch.size > 0) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + } + } + + public void testVectorSerDeRow() throws Throwable { + + try { + Random r = new Random(5678); + for (int c = 0; c < 10; c++) { + testVectorSerializeRow(c, r, SerializationType.BINARY_SORTABLE); + } + for (int c = 0; c < 10; c++) { + testVectorSerializeRow(c, r, SerializationType.LAZY_BINARY); + } + for (int c = 0; c < 10; c++) { + testVectorSerializeRow(c, r, SerializationType.LAZY_SIMPLE); + } + + for (int c = 0; c < 10; c++) { + testVectorDeserializeRow(c, r, SerializationType.BINARY_SORTABLE); + } + for (int c = 0; c < 10; c++) { + testVectorDeserializeRow(c, r, SerializationType.LAZY_BINARY); + } + for (int c = 0; c < 10; c++) { + testVectorDeserializeRow(c, r, SerializationType.LAZY_SIMPLE); + } + + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index becebd4..1a77033 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -247,7 +247,7 @@ public void testArithmeticExpressionVectorization() throws HiveException { children5.add(col6Expr); modExpr.setChildren(children5); - VectorizationContext vc = new VectorizationContext(); + VectorizationContext vc = new VectorizationContext("name"); vc.addInitialColumn("col1"); vc.addInitialColumn("col2"); vc.addInitialColumn("col3"); @@ -297,7 +297,7 @@ public void testStringFilterExpressions() throws HiveException { columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -322,7 +322,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -341,7 +341,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -360,7 +360,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -378,7 +378,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -395,7 +395,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -412,7 +412,7 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -434,7 +434,7 @@ public void testFloatInExpressions() throws HiveException { List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -480,7 +480,7 @@ public void testVectorizeFilterAndOrExpression() throws HiveException { columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -530,7 +530,7 @@ public void testVectorizeAndOrProjectionExpression() throws HiveException { List columns = new ArrayList(); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER); assertEquals(veAnd.getClass(), FilterExprAndExpr.class); assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); @@ -555,7 +555,7 @@ public void testVectorizeAndOrProjectionExpression() throws HiveException { orExprDesc.setChildren(children4); //Allocate new Vectorization context to reset the intermediate columns. - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER); assertEquals(veOr.getClass(), FilterExprOrExpr.class); assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); @@ -596,7 +596,7 @@ public void testNotExpression() throws HiveException { columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(notExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -633,7 +633,7 @@ public void testNullExpressions() throws HiveException { List columns = new ArrayList(); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(isNullExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -674,7 +674,7 @@ public void testNotNullExpressions() throws HiveException { List columns = new ArrayList(); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(isNotNullExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -703,7 +703,7 @@ public void testVectorizeScalarColumnExpression() throws HiveException { List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(scalarMinusConstant, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(ve.getClass(), LongScalarSubtractLongColumn.class); @@ -726,7 +726,7 @@ public void testFilterWithNegativeScalar() throws HiveException { columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -744,7 +744,7 @@ public void testUnaryMinusColumnLong() throws HiveException { List columns = new ArrayList(); columns.add("col0"); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(negExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -762,7 +762,7 @@ public void testUnaryMinusColumnDouble() throws HiveException { List columns = new ArrayList(); columns.add("col0"); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(negExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -787,7 +787,7 @@ public void testFilterScalarCompareColumn() throws HiveException { List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(scalarGreaterColExpr, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongScalarGreaterLongColumn.class, ve.getClass()); } @@ -810,7 +810,7 @@ public void testFilterBooleanColumnCompareBooleanScalar() throws HiveException { List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongColEqualLongScalar.class, ve.getClass()); } @@ -833,7 +833,7 @@ public void testBooleanColumnCompareBooleanScalar() throws HiveException { List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(LongColEqualLongScalar.class, ve.getClass()); } @@ -850,7 +850,7 @@ public void testUnaryStringExpressions() throws HiveException { List columns = new ArrayList(); columns.add("b"); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); GenericUDF stringLower = new GenericUDFLower(); stringUnary.setGenericUDF(stringLower); @@ -860,7 +860,7 @@ public void testUnaryStringExpressions() throws HiveException { assertEquals(1, ((StringLower) ve).getColNum()); assertEquals(2, ((StringLower) ve).getOutputColumn()); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ExprNodeGenericFuncDesc anotherUnary = new ExprNodeGenericFuncDesc(); anotherUnary.setTypeInfo(TypeInfoFactory.stringTypeInfo); @@ -895,7 +895,7 @@ public void testMathFunctions() throws HiveException { List columns = new ArrayList(); columns.add("b"); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); // Sin(double) GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName()); @@ -986,7 +986,7 @@ public void testTimeStampUdfs() throws HiveException { List columns = new ArrayList(); columns.add("b"); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); //UDFYear GenericUDFBridge gudfBridge = new GenericUDFBridge("year", false, UDFYear.class.getName()); @@ -1024,7 +1024,7 @@ public void testBetweenFilters() throws HiveException { columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnBetween); @@ -1050,7 +1050,7 @@ public void testBetweenFilters() throws HiveException { exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterCharColumnBetween); @@ -1075,7 +1075,7 @@ public void testBetweenFilters() throws HiveException { exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterVarCharColumnBetween); @@ -1144,7 +1144,7 @@ public void testInFiltersAndExprs() throws HiveException { columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnInList); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -1199,7 +1199,7 @@ public void testIfConditionalExprs() throws HiveException { columns.add("col1"); columns.add("col2"); columns.add("col3"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongColumnLongColumn); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index b6de046..0246cd5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -1288,9 +1288,6 @@ JobConf createMockExecutionEnvironment(Path workDir, } mapWork.setPathToAliases(aliasMap); mapWork.setPathToPartitionInfo(partMap); - mapWork.setAllColumnVectorMaps(new HashMap>()); - mapWork.setAllScratchColumnVectorTypeMaps(new HashMap>()); // write the plan out FileSystem localFs = FileSystem.getLocal(conf).getRaw(); diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index ec47c08..d12c137 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -52,7 +52,7 @@ public void setUp() { columns.add("col3"); //Generate vectorized expression - vContext = new VectorizationContext(columns); + vContext = new VectorizationContext("name", columns); } @Description(name = "fake", value = "FAKE") diff --git ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out index 9645b13..3f8a271 100644 --- ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out @@ -144,17 +144,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: decimal(38,18)) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -170,4 +166,4 @@ POSTHOOK: query: select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### --4997414117561.546875 4994550248722.298828 -10252745435816.024410 -5399023399.587163986308583465 +-4997414117561.546875 4994550248722.298828 -10252745435816.02441 -5399023399.587163986308583465 diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out index 59f872a..4a2b795 100644 --- ql/src/test/results/clientpositive/tez/vector_char_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -77,12 +77,12 @@ STAGE PLANS: alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -101,15 +101,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: + - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -209,12 +205,12 @@ STAGE PLANS: alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -233,15 +229,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: - - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/vector_date_1.q.out ql/src/test/results/clientpositive/tez/vector_date_1.q.out new file mode 100644 index 0000000..8d1192a --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_date_1.q.out @@ -0,0 +1,719 @@ +PREHOOK: query: drop table if exists vector_date_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vector_date_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vector_date_1 (dt1 date, dt2 date) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: create table vector_date_1 (dt1 date, dt2 date) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_date_1 +PREHOOK: query: insert into table vector_date_1 + select null, null from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: insert into table vector_date_1 + select null, null from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_date_1 +POSTHOOK: Lineage: vector_date_1.dt1 EXPRESSION [] +POSTHOOK: Lineage: vector_date_1.dt2 EXPRESSION [] +PREHOOK: query: insert into table vector_date_1 + select date '1999-12-31', date '2000-01-01' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: insert into table vector_date_1 + select date '1999-12-31', date '2000-01-01' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_date_1 +POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] +POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +PREHOOK: query: insert into table vector_date_1 + select date '2001-01-01', date '2001-06-01' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: insert into table vector_date_1 + select date '2001-01-01', date '2001-06-01' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_date_1 +POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] +POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +PREHOOK: query: -- column-to-column comparison in select clause +explain +select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-column comparison in select clause +explain +select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 2000-01-01 true true true true true true true true +2001-01-01 2001-06-01 true true true true true true true true +PREHOOK: query: explain +select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 2000-01-01 false false false false false false false false +2001-01-01 2001-06-01 false false false false false false false false +PREHOOK: query: -- column-to-literal/literal-to-column comparison in select clause +explain +select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-literal/literal-to-column comparison in select clause +explain +select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 true true true true true true true true +2001-01-01 true true true true true true true true +PREHOOK: query: explain +select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 false false false false false false false false +2001-01-01 false false false false false false false false +PREHOOK: query: -- column-to-column comparisons in predicate +-- all rows with non-null dt1 should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-column comparisons in predicate +-- all rows with non-null dt1 should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((dt1 = dt1) and (dt1 <> dt2)) and (dt1 < dt2)) and (dt1 <= dt2)) and (dt2 > dt1)) and (dt2 >= dt1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: dt1 (type: date), dt2 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +1999-12-31 2000-01-01 +2001-01-01 2001-06-01 +PREHOOK: query: -- column-to-literal/literal-to-column comparison in predicate +-- only a single row should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-literal/literal-to-column comparison in predicate +-- only a single row should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((dt1 = 2001-01-01) and (2001-01-01 = dt1)) and (dt1 <> 1970-01-01)) and (1970-01-01 <> dt1)) and (dt1 > 1970-01-01)) and (dt1 >= 1970-01-01)) and (1970-01-01 < dt1)) and (1970-01-01 <= dt1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: dt2 (type: date) + outputColumnNames: _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 2001-01-01 (type: date) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: 2001-01-01 (type: date), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +2001-01-01 2001-06-01 +PREHOOK: query: drop table vector_date_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@vector_date_1 +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: drop table vector_date_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@vector_date_1 +POSTHOOK: Output: default@vector_date_1 diff --git ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out index 7ea7552..9ea95b9 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out @@ -114,18 +114,18 @@ STAGE PLANS: alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -264,18 +264,18 @@ STAGE PLANS: alias: decimal_tbl_rc Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -415,19 +415,19 @@ STAGE PLANS: alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out index 0d00c2d..103fd8a 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out @@ -121,7 +121,7 @@ FROM decimal_tbl_1_orc ORDER BY d POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_1_orc #### A masked pattern was here #### -55555 55555 55555.0 55555.00 55555.000 55560 55600 56000 60000 100000 0 0 0 +55555 55555 55555 55555 55555 55560 55600 56000 60000 100000 0 0 0 PREHOOK: query: create table decimal_tbl_2_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -240,7 +240,7 @@ FROM decimal_tbl_2_orc ORDER BY p POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### -125 125 125.3 125.32 125.315 125.3150 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.3150 -130 -100 0 0 +125 125 125.3 125.32 125.315 125.315 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.315 -130 -100 0 0 PREHOOK: query: create table decimal_tbl_3_orc (dec decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -402,7 +402,7 @@ FROM decimal_tbl_3_orc ORDER BY d POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.141592653590 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.1415926535897930 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.14159265359 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.141592653589793 PREHOOK: query: create table decimal_tbl_4_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -457,7 +457,7 @@ STAGE PLANS: alias: decimal_tbl_4_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)), round(1809242.3151111344, 9) (type: decimal(17,9)), round((- 1809242.3151111344), 9) (type: decimal(17,9)) + expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -465,6 +465,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)), _col2 (type: decimal(17,9)), _col3 (type: decimal(17,9)) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/tez/vector_if_expr.q.out ql/src/test/results/clientpositive/tez/vector_if_expr.q.out index ca7134e..d1ed01c 100644 --- ql/src/test/results/clientpositive/tez/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/tez/vector_if_expr.q.out @@ -19,18 +19,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cboolean1 is not null and cboolean1) (type: boolean) - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 @@ -38,10 +38,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/tez/vector_interval_1.q.out ql/src/test/results/clientpositive/tez/vector_interval_1.q.out new file mode 100644 index 0000000..1cc7cb6 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_interval_1.q.out @@ -0,0 +1,822 @@ +PREHOOK: query: drop table if exists vector_interval_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vector_interval_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_interval_1 +POSTHOOK: query: create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_interval_1 +PREHOOK: query: insert into vector_interval_1 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1 2:3:4' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_1 +POSTHOOK: query: insert into vector_interval_1 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1 2:3:4' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_1 +POSTHOOK: Lineage: vector_interval_1.dt SIMPLE [] +POSTHOOK: Lineage: vector_interval_1.str1 SIMPLE [] +POSTHOOK: Lineage: vector_interval_1.str2 SIMPLE [] +POSTHOOK: Lineage: vector_interval_1.ts SIMPLE [] +PREHOOK: query: insert into vector_interval_1 + select null, null, null, null from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_1 +POSTHOOK: query: insert into vector_interval_1 + select null, null, null, null from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_1 +POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] +POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] +PREHOOK: query: -- constants/cast from string +explain +select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +PREHOOK: type: QUERY +POSTHOOK: query: -- constants/cast from string +explain +select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str1 (type: string), 1-2 (type: interval_year_month), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_year_month), _col2 (type: interval_year_month), _col3 (type: interval_day_time), _col4 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL 1-2 NULL 1 02:03:04.000000000 NULL +1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 +PREHOOK: query: -- interval arithmetic +explain +select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- interval arithmetic +explain +select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), 2-4 (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), 0-0 (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_year_month), _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col4 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month), VALUE._col4 (type: interval_year_month), VALUE._col5 (type: interval_year_month) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL 2-4 NULL NULL 0-0 NULL NULL +2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 +PREHOOK: query: explain +select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), 2 04:06:08.000000000 (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL +2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 +PREHOOK: query: -- date-interval arithmetic +explain +select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- date-interval arithmetic +explain +select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 +PREHOOK: query: -- timestamp-interval arithmetic +explain +select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- timestamp-interval arithmetic +explain +select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 +PREHOOK: query: -- timestamp-timestamp arithmetic +explain +select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- timestamp-timestamp arithmetic +explain +select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL +2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 +PREHOOK: query: -- date-date arithmetic +explain +select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- date-date arithmetic +explain +select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL +2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 +PREHOOK: query: -- date-timestamp arithmetic +explain +select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- date-timestamp arithmetic +explain +select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL +2001-01-01 0 01:02:03.000000000 0 01:02:03.000000000 0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 diff --git ql/src/test/results/clientpositive/tez/vector_interval_2.q.out ql/src/test/results/clientpositive/tez/vector_interval_2.q.out new file mode 100644 index 0000000..0f32cee --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_interval_2.q.out @@ -0,0 +1,1620 @@ +PREHOOK: query: drop table if exists vector_interval_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vector_interval_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 string, str3 string, str4 string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 string, str3 string, str4 string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_interval_2 +PREHOOK: query: insert into vector_interval_2 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1-3', '1 2:3:4', '1 2:3:5' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: insert into vector_interval_2 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1-3', '1 2:3:4', '1 2:3:5' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_2 +POSTHOOK: Lineage: vector_interval_2.dt SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str1 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str2 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str3 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str4 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.ts SIMPLE [] +PREHOOK: query: insert into vector_interval_2 + select null, null, null, null, null, null from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: insert into vector_interval_2 + select null, null, null, null, null, null from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_2 +POSTHOOK: Lineage: vector_interval_2.dt EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str1 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str2 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str3 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] +PREHOOK: query: -- interval comparisons in select clause + +explain +select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +POSTHOOK: query: -- interval comparisons in select clause + +explain +select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1-2 true true true true true true true true true true true true true true true true true true true true true true true true +PREHOOK: query: explain +select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) + outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1-2 false false false false false false false false false false false false false false false false false false +PREHOOK: query: explain +select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true +PREHOOK: query: explain +select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) + outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 2:3:4 false false false false false false false false false false false false false false false false false false +PREHOOK: query: -- interval expressions in predicates +explain +select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- interval expressions in predicates +explain +select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH))) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH))) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH))) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH))) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH))) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2)) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH))) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH))) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH))) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH))) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH))) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND))) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND))) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND))) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND))) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND))) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000)) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND))) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND))) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND))) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND))) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND))) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01)) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01)) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01)) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))) and (2002-03-01 = (dt + 1-2))) and (2002-03-01 <= (dt + 1-2))) and (2002-03-01 >= (dt + 1-2))) and ((dt + 1-2) = 2002-03-01)) and ((dt + 1-2) <= 2002-03-01)) and ((dt + 1-2) >= 2002-03-01)) and (dt <> (dt + 1-2))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2))) and (2002-03-01 01:02:03.0 >= (ts + 1-2))) and (2002-04-01 01:02:03.0 <> (ts + 1-2))) and (2002-02-01 01:02:03.0 < (ts + 1-2))) and (2002-04-01 01:02:03.0 > (ts + 1-2))) and ((ts + 1-2) = 2002-03-01 01:02:03.0)) and ((ts + 1-2) >= 2002-03-01 01:02:03.0)) and ((ts + 1-2) <= 2002-03-01 01:02:03.0)) and ((ts + 1-2) <> 2002-04-01 01:02:03.0)) and ((ts + 1-2) > 2002-02-01 01:02:03.0)) and ((ts + 1-2) < 2002-04-01 01:02:03.0)) and (ts = (ts + 0-0))) and (ts <> (ts + 1-0))) and (ts <= (ts + 1-0))) and (ts < (ts + 1-0))) and (ts >= (ts - 1-0))) and (ts > (ts - 1-0))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: -- day to second expressions in predicate +explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- day to second expressions in predicate +explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000))) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000))) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000))) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000))) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000))) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0)) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0)) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0)) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0)) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0)) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0)) and (ts = (dt + 0 01:02:03.000000000))) and (ts <> (dt + 0 01:02:04.000000000))) and (ts <= (dt + 0 01:02:03.000000000))) and (ts < (dt + 0 01:02:04.000000000))) and (ts >= (dt - 0 01:02:03.000000000))) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000))) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0)) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0)) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0)) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0)) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0)) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0)) and (ts = (ts + 0 00:00:00.000000000))) and (ts <> (ts + 1 00:00:00.000000000))) and (ts <= (ts + 1 00:00:00.000000000))) and (ts < (ts + 1 00:00:00.000000000))) and (ts >= (ts - 1 00:00:00.000000000))) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: drop table vector_interval_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@vector_interval_2 +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: drop table vector_interval_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@vector_interval_2 +POSTHOOK: Output: default@vector_interval_2 diff --git ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out new file mode 100644 index 0000000..b54f496 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select * from (select cast(1 as int) as rn from src limit 1)a + union all + select * from (select cast(100 as int) as rn from src limit 1)b + union all + select * from (select cast(10000 as int) as rn from src limit 1)c + ) t +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orc1 +POSTHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select * from (select cast(1 as int) as rn from src limit 1)a + union all + select * from (select cast(100 as int) as rn from src limit 1)b + union all + select * from (select cast(10000 as int) as rn from src limit 1)c + ) t +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc1 +PREHOOK: query: create table orc_rn1 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn1 +POSTHOOK: query: create table orc_rn1 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn1 +PREHOOK: query: create table orc_rn2 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn2 +POSTHOOK: query: create table orc_rn2 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn2 +PREHOOK: query: create table orc_rn3 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: create table orc_rn3 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn3 +PREHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 154 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (rn < 100) (type: boolean) + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + Filter Operator + predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + Filter Operator + predicate: (rn >= 1000) (type: boolean) + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + Execution mode: vectorized + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc1 +PREHOOK: Output: default@orc_rn1 +PREHOOK: Output: default@orc_rn2 +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc1 +POSTHOOK: Output: default@orc_rn1 +POSTHOOK: Output: default@orc_rn2 +POSTHOOK: Output: default@orc_rn3 +POSTHOOK: Lineage: orc_rn1.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn2.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn3.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +PREHOOK: query: select * from orc_rn1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +1 +PREHOOK: query: select * from orc_rn2 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +100 +PREHOOK: query: select * from orc_rn3 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +10000 diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index ccdb820..a6ab7a7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -132,7 +132,7 @@ boolean[] columnSortOrderIsDesc; private static byte[] decimalBuffer = null; - private static Charset decimalCharSet = Charset.forName("US-ASCII"); + public static Charset decimalCharSet = Charset.forName("US-ASCII"); @Override public void initialize(Configuration conf, Properties tbl) @@ -572,7 +572,7 @@ static int getCharacterMaxLength(TypeInfo type) { return ((BaseCharTypeInfo)type).getLength(); } - static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) + public static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) throws IOException { // Get the actual length first int start = buffer.tell(); @@ -636,7 +636,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDe return serializeBytesWritable; } - private static void writeByte(RandomAccessOutput buffer, byte b, boolean invert) { + public static void writeByte(RandomAccessOutput buffer, byte b, boolean invert) { if (invert) { b = (byte) (0xff ^ b); } @@ -892,7 +892,7 @@ static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, } - private static void serializeBytes( + public static void serializeBytes( ByteStream.Output buffer, byte[] data, int length, boolean invert) { for (int i = 0; i < length; i++) { if (data[i] == 0 || data[i] == 1) { @@ -905,14 +905,27 @@ private static void serializeBytes( writeByte(buffer, (byte) 0, invert); } - private static void serializeInt(ByteStream.Output buffer, int v, boolean invert) { + public static void serializeBytes( + ByteStream.Output buffer, byte[] data, int offset, int length, boolean invert) { + for (int i = offset; i < offset + length; i++) { + if (data[i] == 0 || data[i] == 1) { + writeByte(buffer, (byte) 1, invert); + writeByte(buffer, (byte) (data[i] + 1), invert); + } else { + writeByte(buffer, data[i], invert); + } + } + writeByte(buffer, (byte) 0, invert); + } + + public static void serializeInt(ByteStream.Output buffer, int v, boolean invert) { writeByte(buffer, (byte) ((v >> 24) ^ 0x80), invert); writeByte(buffer, (byte) (v >> 16), invert); writeByte(buffer, (byte) (v >> 8), invert); writeByte(buffer, (byte) v, invert); } - private static void serializeLong(ByteStream.Output buffer, long v, boolean invert) { + public static void serializeLong(ByteStream.Output buffer, long v, boolean invert) { writeByte(buffer, (byte) ((v >> 56) ^ 0x80), invert); writeByte(buffer, (byte) (v >> 48), invert); writeByte(buffer, (byte) (v >> 40), invert); diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java index 300d535..41d48a0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java @@ -78,6 +78,10 @@ public final int getEnd() { return end; } + public final boolean isEof() { + return (start >= end); + } + /** * Returns the underlying byte array. */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java new file mode 100644 index 0000000..2b6d9c0 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -0,0 +1,746 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.binarysortable.fast; + +import java.io.EOFException; +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; + +/* + * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public class BinarySortableDeserializeRead implements DeserializeRead { + public static final Log LOG = LogFactory.getLog(BinarySortableDeserializeRead.class.getName()); + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + // The sort order (ascending/descending) for each field. Set to true when descending (invert). + private boolean[] columnSortOrderIsDesc; + + // Which field we are on. We start with -1 so readCheckNull can increment once and the read + // field data methods don't increment. + private int fieldIndex; + + private int fieldCount; + + private int start; + + private DecimalTypeInfo saveDecimalTypeInfo; + private HiveDecimal saveDecimal; + + private byte[] tempDecimalBuffer; + private HiveDecimalWritable tempHiveDecimalWritable; + + private boolean readBeyondConfiguredFieldsWarned; + private boolean readBeyondBufferRangeWarned; + private boolean bufferRangeHasExtraDataWarned; + + private InputByteBuffer inputByteBuffer = new InputByteBuffer(); + + /* + * Use this constructor when only ascending sort order is used. + */ + public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) { + this(primitiveTypeInfos, null); + } + + public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, + boolean[] columnSortOrderIsDesc) { + this.primitiveTypeInfos = primitiveTypeInfos; + fieldCount = primitiveTypeInfos.length; + if (columnSortOrderIsDesc != null) { + this.columnSortOrderIsDesc = columnSortOrderIsDesc; + } else { + this.columnSortOrderIsDesc = new boolean[primitiveTypeInfos.length]; + Arrays.fill(this.columnSortOrderIsDesc, false); + } + inputByteBuffer = new InputByteBuffer(); + readBeyondConfiguredFieldsWarned = false; + readBeyondBufferRangeWarned = false; + bufferRangeHasExtraDataWarned = false; + } + + // Not public since we must have column information. + private BinarySortableDeserializeRead() { + } + + /* + * The primitive type information for all fields. + */ + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + /* + * Set the range of bytes to be deserialized. + */ + @Override + public void set(byte[] bytes, int offset, int length) { + fieldIndex = -1; + inputByteBuffer.reset(bytes, offset, offset + length); + start = offset; + } + + /* + * Reads the NULL information for a field. + * + * @return Returns true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + @Override + public boolean readCheckNull() throws IOException { + + // We start with fieldIndex as -1 so we can increment once here and then the read + // field data methods don't increment. + fieldIndex++; + + if (fieldIndex >= fieldCount) { + // Reading beyond the specified field count produces NULL. + if (!readBeyondConfiguredFieldsWarned) { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + return true; + } + if (inputByteBuffer.isEof()) { + // Also, reading beyond our byte range produces NULL. + if (!readBeyondBufferRangeWarned) { + // Warn only once. + int length = inputByteBuffer.tell() - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more... " + + "(total buffer length " + inputByteBuffer.getData().length + ")" + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; + } + // We cannot read beyond so we must return NULL here. + return true; + } + byte isNull = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); + + if (isNull == 0) { + return true; + } + + // We have a field and are positioned to it. + + if (primitiveTypeInfos[fieldIndex].getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } + + /* + * Call this method after all fields have been read to check for extra fields. + */ + public void extraFieldsCheck() { + if (!inputByteBuffer.isEof()) { + // We did not consume all of the byte range. + if (!bufferRangeHasExtraDataWarned) { + // Warn only once. + int length = inputByteBuffer.getEnd() - start; + int remaining = inputByteBuffer.getEnd() - inputByteBuffer.tell(); + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + + " for length " + length + " but " + remaining + " bytes remain. " + + "(total buffer length " + inputByteBuffer.getData().length + ")" + + " Ignoring similar problems."); + bufferRangeHasExtraDataWarned = true; + } + } + } + + /* + * Read integrity warning flags. + */ + @Override + public boolean readBeyondConfiguredFieldsWarned() { + return readBeyondConfiguredFieldsWarned; + } + @Override + public boolean readBeyondBufferRangeWarned() { + return readBeyondBufferRangeWarned; + } + @Override + public boolean bufferRangeHasExtraDataWarned() { + return bufferRangeHasExtraDataWarned; + } + + /* + * BOOLEAN. + */ + @Override + public boolean readBoolean() throws IOException { + byte b = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); + return (b == 2); + } + + /* + * BYTE. + */ + @Override + public byte readByte() throws IOException { + return (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80); + } + + /* + * SHORT. + */ + @Override + public short readShort() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + return (short) v; + } + + /* + * INT. + */ + @Override + public int readInt() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + return v; + } + + /* + * LONG. + */ + @Override + public long readLong() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 7; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + return v; + } + + /* + * FLOAT. + */ + @Override + public float readFloat() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = 0; + for (int i = 0; i < 4; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + if ((v & (1 << 31)) == 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1 << 31); + } + return Float.intBitsToFloat(v); + } + + /* + * DOUBLE. + */ + @Override + public double readDouble() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long v = 0; + for (int i = 0; i < 8; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + if ((v & (1L << 63)) == 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1L << 63); + } + return Double.longBitsToDouble(v); + } + + // This class is for internal use. + private static class BinarySortableReadStringResults extends ReadStringResults { + + // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable + // format string into. + private Text text; + + public BinarySortableReadStringResults() { + super(); + text = new Text(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadStringResults createReadStringResults() { + return new BinarySortableReadStringResults(); + } + + + @Override + public void readString(ReadStringResults readStringResults) throws IOException { + BinarySortableReadStringResults binarySortableReadStringResults = + (BinarySortableReadStringResults) readStringResults; + + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadStringResults.text); + readStringResults.bytes = binarySortableReadStringResults.text.getBytes(); + readStringResults.start = 0; + readStringResults.length = binarySortableReadStringResults.text.getLength(); + } + + + /* + * CHAR. + */ + + // This class is for internal use. + private static class BinarySortableReadHiveCharResults extends ReadHiveCharResults { + + public BinarySortableReadHiveCharResults() { + super(); + } + + public HiveCharWritable getHiveCharWritable() { + return hiveCharWritable; + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different CHAR field. + @Override + public ReadHiveCharResults createReadHiveCharResults() { + return new BinarySortableReadHiveCharResults(); + } + + public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { + BinarySortableReadHiveCharResults binarySortableReadHiveCharResults = + (BinarySortableReadHiveCharResults) readHiveCharResults; + + if (!binarySortableReadHiveCharResults.isInit()) { + binarySortableReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + HiveCharWritable hiveCharWritable = binarySortableReadHiveCharResults.getHiveCharWritable(); + + // Decode the bytes into our Text buffer, then truncate. + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveCharWritable.getTextValue()); + hiveCharWritable.enforceMaxLength(binarySortableReadHiveCharResults.getMaxLength()); + + readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); + readHiveCharResults.start = 0; + readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); + } + + /* + * VARCHAR. + */ + + // This class is for internal use. + private static class BinarySortableReadHiveVarcharResults extends ReadHiveVarcharResults { + + public BinarySortableReadHiveVarcharResults() { + super(); + } + + public HiveVarcharWritable getHiveVarcharWritable() { + return hiveVarcharWritable; + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different VARCHAR field. + @Override + public ReadHiveVarcharResults createReadHiveVarcharResults() { + return new BinarySortableReadHiveVarcharResults(); + } + + public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { + BinarySortableReadHiveVarcharResults binarySortableReadHiveVarcharResults = (BinarySortableReadHiveVarcharResults) readHiveVarcharResults; + + if (!binarySortableReadHiveVarcharResults.isInit()) { + binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + HiveVarcharWritable hiveVarcharWritable = binarySortableReadHiveVarcharResults.getHiveVarcharWritable(); + + // Decode the bytes into our Text buffer, then truncate. + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveVarcharWritable.getTextValue()); + hiveVarcharWritable.enforceMaxLength(binarySortableReadHiveVarcharResults.getMaxLength()); + + readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); + readHiveVarcharResults.start = 0; + readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); + } + + /* + * BINARY. + */ + + // This class is for internal use. + private static class BinarySortableReadBinaryResults extends ReadBinaryResults { + + // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable + // format string into. + private Text text; + + public BinarySortableReadBinaryResults() { + super(); + text = new Text(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadBinaryResults createReadBinaryResults() { + return new BinarySortableReadBinaryResults(); + } + + @Override + public void readBinary(ReadBinaryResults readBinaryResults) throws IOException { + BinarySortableReadBinaryResults binarySortableReadBinaryResults = + (BinarySortableReadBinaryResults) readBinaryResults; + + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadBinaryResults.text); + readBinaryResults.bytes = binarySortableReadBinaryResults.text.getBytes(); + readBinaryResults.start = 0; + readBinaryResults.length = binarySortableReadBinaryResults.text.getLength(); + } + + /* + * DATE. + */ + + // This class is for internal use. + private static class BinarySortableReadDateResults extends ReadDateResults { + + public BinarySortableReadDateResults() { + super(); + } + + public DateWritable getDateWritable() { + return dateWritable; + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DATE field. + @Override + public ReadDateResults createReadDateResults() { + return new BinarySortableReadDateResults(); + } + + @Override + public void readDate(ReadDateResults readDateResults) throws IOException { + BinarySortableReadDateResults binarySortableReadDateResults = (BinarySortableReadDateResults) readDateResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + DateWritable dateWritable = binarySortableReadDateResults.getDateWritable(); + dateWritable.set(v); + } + + /* + * TIMESTAMP. + */ + + // This class is for internal use. + private static class BinarySortableReadTimestampResults extends ReadTimestampResults { + + private byte[] timestampBytes; + + public BinarySortableReadTimestampResults() { + super(); + timestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH]; + } + + public TimestampWritable getTimestampWritable() { + return timestampWritable; + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different TIMESTAMP field. + @Override + public ReadTimestampResults createReadTimestampResults() { + return new BinarySortableReadTimestampResults(); + } + + @Override + public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException { + BinarySortableReadTimestampResults binarySortableReadTimestampResults = (BinarySortableReadTimestampResults) readTimestampResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + byte[] timestampBytes = binarySortableReadTimestampResults.timestampBytes; + for (int i = 0; i < timestampBytes.length; i++) { + timestampBytes[i] = inputByteBuffer.read(invert); + } + TimestampWritable timestampWritable = binarySortableReadTimestampResults.getTimestampWritable(); + timestampWritable.setBinarySortable(timestampBytes, 0); + } + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for internal use. + private static class BinarySortableReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { + + public BinarySortableReadIntervalYearMonthResults() { + super(); + } + + public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { + return hiveIntervalYearMonthWritable; + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_YEAR_MONTH field. + @Override + public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { + return new BinarySortableReadIntervalYearMonthResults(); + } + + @Override + public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) + throws IOException { + BinarySortableReadIntervalYearMonthResults binarySortableReadIntervalYearMonthResults = + (BinarySortableReadIntervalYearMonthResults) readIntervalYearMonthResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + binarySortableReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); + hiveIntervalYearMonthWritable.set(v); + } + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for internal use. + private static class BinarySortableReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { + + public BinarySortableReadIntervalDayTimeResults() { + super(); + } + + public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { + return hiveIntervalDayTimeWritable; + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_DAY_TIME field. + @Override + public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { + return new BinarySortableReadIntervalDayTimeResults(); + } + + @Override + public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) + throws IOException { + BinarySortableReadIntervalDayTimeResults binarySortableReadIntervalDayTimeResults = + (BinarySortableReadIntervalDayTimeResults) readIntervalDayTimeResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long totalSecs = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 7; i++) { + totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff); + } + int nanos = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff); + } + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + binarySortableReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); + hiveIntervalDayTimeWritable.set(totalSecs, nanos); + } + + /* + * DECIMAL. + */ + + // This class is for internal use. + private static class BinarySortableReadDecimalResults extends ReadDecimalResults { + + public HiveDecimal hiveDecimal; + + public BinarySortableReadDecimalResults() { + super(); + } + + @Override + public void init(DecimalTypeInfo decimalTypeInfo) { + super.init(decimalTypeInfo); + } + + @Override + public HiveDecimal getHiveDecimal() { + return hiveDecimal; + } + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DECIMAL field. + @Override + public ReadDecimalResults createReadDecimalResults() { + return new BinarySortableReadDecimalResults(); + } + + @Override + public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException { + BinarySortableReadDecimalResults binarySortableReadDecimalResults = + (BinarySortableReadDecimalResults) readDecimalResults; + + if (!binarySortableReadDecimalResults.isInit()) { + binarySortableReadDecimalResults.init(saveDecimalTypeInfo); + } + + binarySortableReadDecimalResults.hiveDecimal = saveDecimal; + + saveDecimal = null; + saveDecimalTypeInfo = null; + } + + /** + * We read the whole HiveDecimal value and then enforce precision and scale, which may + * make it a NULL. + * @return Returns true if this HiveDecimal enforced to a NULL. + * @throws IOException + */ + private boolean earlyReadHiveDecimal() throws IOException { + + // Since enforcing precision and scale can cause a HiveDecimal to become NULL, + // we must read it, enforce it here, and either return NULL or buffer the result. + + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int b = inputByteBuffer.read(invert) - 1; + assert (b == 1 || b == -1 || b == 0); + boolean positive = b != -1; + + int factor = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff); + } + + if (!positive) { + factor = -factor; + } + + int start = inputByteBuffer.tell(); + int length = 0; + + do { + b = inputByteBuffer.read(positive ? invert : !invert); + assert(b != 1); + + if (b == 0) { + // end of digits + break; + } + + length++; + } while (true); + + if(tempDecimalBuffer == null || tempDecimalBuffer.length < length) { + tempDecimalBuffer = new byte[length]; + } + + inputByteBuffer.seek(start); + for (int i = 0; i < length; ++i) { + tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert); + } + + // read the null byte again + inputByteBuffer.read(positive ? invert : !invert); + + String digits = new String(tempDecimalBuffer, 0, length, BinarySortableSerDe.decimalCharSet); + BigInteger bi = new BigInteger(digits); + HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor-length); + + if (!positive) { + bd = bd.negate(); + } + + // We have a decimal. After we enforce precision and scale, will it become a NULL? + + if (tempHiveDecimalWritable == null) { + tempHiveDecimalWritable = new HiveDecimalWritable(); + } + tempHiveDecimalWritable.set(bd); + + saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + + int precision = saveDecimalTypeInfo.getPrecision(); + int scale = saveDecimalTypeInfo.getScale(); + + saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale); + + // Now return whether it is NULL or NOT NULL. + return (saveDecimal == null); + } +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java new file mode 100644 index 0000000..f62def8 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java @@ -0,0 +1,448 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.binarysortable.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalDayTime; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalYearMonth; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hive.common.util.DateUtils; + +/* + * Directly serialize, field-by-field, the BinarySortable format. + * + * This is an alternative way to serialize than what is provided by BinarySortableSerDe. + */ +public class BinarySortableSerializeWrite implements SerializeWrite { + public static final Log LOG = LogFactory.getLog(BinarySortableSerializeWrite.class.getName()); + + private Output output; + + // The sort order (ascending/descending) for each field. Set to true when descending (invert). + private boolean[] columnSortOrderIsDesc; + + // Which field we are on. We start with -1 to be consistent in style with + // BinarySortableDeserializeRead. + private int index; + + private int fieldCount; + + private TimestampWritable tempTimestampWritable; + + public BinarySortableSerializeWrite(boolean[] columnSortOrderIsDesc) { + this(); + fieldCount = columnSortOrderIsDesc.length; + this.columnSortOrderIsDesc = columnSortOrderIsDesc; + } + + /* + * Use this constructor when only ascending sort order is used. + */ + public BinarySortableSerializeWrite(int fieldCount) { + this(); + this.fieldCount = fieldCount; + columnSortOrderIsDesc = new boolean[fieldCount]; + Arrays.fill(columnSortOrderIsDesc, false); + } + + // Not public since we must have the field count or column sort order information. + private BinarySortableSerializeWrite() { + tempTimestampWritable = new TimestampWritable(); + } + + /* + * Set the buffer that will receive the serialized data. + */ + @Override + public void set(Output output) { + this.output = output; + this.output.reset(); + index = -1; + } + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + @Override + public void reset() { + output.reset(); + index = -1; + } + + /* + * Write a NULL field. + */ + @Override + public void writeNull() throws IOException { + BinarySortableSerDe.writeByte(output, (byte) 0, columnSortOrderIsDesc[++index]); + } + + /* + * BOOLEAN. + */ + @Override + public void writeBoolean(boolean v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) (v ? 2 : 1), invert); + } + + /* + * BYTE. + */ + @Override + public void writeByte(byte v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) (v ^ 0x80), invert); + } + + /* + * SHORT. + */ + @Override + public void writeShort(short v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) ((v >> 8) ^ 0x80), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + } + + /* + * INT. + */ + @Override + public void writeInt(int v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, v, invert); + } + + /* + * LONG. + */ + @Override + public void writeLong(long v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) ((v >> 56) ^ 0x80), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 48), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 40), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 32), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 24), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + + } + + /* + * FLOAT. + */ + @Override + public void writeFloat(float vf) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + int v = Float.floatToIntBits(vf); + if ((v & (1 << 31)) != 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1 << 31); + } + BinarySortableSerDe.writeByte(output, (byte) (v >> 24), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + } + + /* + * DOUBLE. + */ + @Override + public void writeDouble(double vd) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + long v = Double.doubleToLongBits(vd); + if ((v & (1L << 63)) != 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1L << 63); + } + BinarySortableSerDe.writeByte(output, (byte) (v >> 56), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 48), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 40), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 32), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 24), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + } + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + @Override + public void writeString(byte[] v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); + } + + @Override + public void writeString(byte[] v, int start, int length) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, start, length, invert); + } + + /* + * CHAR. + */ + @Override + public void writeHiveChar(HiveChar hiveChar) throws IOException { + String string = hiveChar.getStrippedValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * VARCHAR. + */ + @Override + public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { + String string = hiveVarchar.getValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * BINARY. + */ + @Override + public void writeBinary(byte[] v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); + } + + @Override + public void writeBinary(byte[] v, int start, int length) { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, start, length, invert); + } + + /* + * DATE. + */ + @Override + public void writeDate(Date date) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, DateWritable.dateToDays(date), invert); + } + + // We provide a faster way to write a date without a Date object. + @Override + public void writeDate(int dateAsDays) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, dateAsDays, invert); + } + + /* + * TIMESTAMP. + */ + @Override + public void writeTimestamp(Timestamp vt) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + tempTimestampWritable.set(vt); + byte[] data = tempTimestampWritable.getBinarySortable(); + for (int i = 0; i < data.length; i++) { + BinarySortableSerDe.writeByte(output, data[i], invert); + } + } + + /* + * INTERVAL_YEAR_MONTH. + */ + @Override + public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + int totalMonths = viyt.getTotalMonths(); + BinarySortableSerDe.serializeInt(output, totalMonths, invert); + } + + @Override + public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, totalMonths, invert); + } + + /* + * INTERVAL_DAY_TIME. + */ + @Override + public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + long totalSecs = vidt.getTotalSeconds(); + int nanos = vidt.getNanos(); + BinarySortableSerDe.serializeLong(output, totalSecs, invert); + BinarySortableSerDe.serializeInt(output, nanos, invert); + } + + @Override + public void writeHiveIntervalDayTime(long totalNanos) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + long totalSecs = DateUtils.getIntervalDayTimeTotalSecondsFromTotalNanos(totalNanos); + int nanos = DateUtils.getIntervalDayTimeNanosFromTotalNanos(totalNanos); + BinarySortableSerDe.serializeLong(output, totalSecs, invert); + BinarySortableSerDe.serializeInt(output, nanos, invert); + } + + /* + * DECIMAL. + */ + @Override + public void writeHiveDecimal(HiveDecimal dec) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + // decimals are encoded in three pieces: + // sign: 1, 2 or 3 for smaller, equal or larger than 0 respectively + // factor: Number that indicates the amount of digits you have to move + // the decimal point left or right until the resulting number is smaller + // than zero but has something other than 0 as the first digit. + // digits: which is a string of all the digits in the decimal. If the number + // is negative the binary string will be inverted to get the correct ordering. + // Example: 0.00123 + // Sign is 3 (bigger than 0) + // Factor is -2 (move decimal point 2 positions right) + // Digits are: 123 + + // get the sign of the big decimal + int sign = dec.compareTo(HiveDecimal.ZERO); + + // we'll encode the absolute value (sign is separate) + dec = dec.abs(); + + // get the scale factor to turn big decimal into a decimal < 1 + int factor = dec.precision() - dec.scale(); + factor = sign == 1 ? factor : -factor; + + // convert the absolute big decimal to string + dec.scaleByPowerOfTen(Math.abs(dec.scale())); + String digits = dec.unscaledValue().toString(); + + // finally write out the pieces (sign, scale, digits) + BinarySortableSerDe.writeByte(output, (byte) ( sign + 1), invert); + BinarySortableSerDe.writeByte(output, (byte) ((factor >> 24) ^ 0x80), invert); + BinarySortableSerDe.writeByte(output, (byte) ( factor >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) ( factor >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) factor, invert); + BinarySortableSerDe.serializeBytes(output, digits.getBytes(BinarySortableSerDe.decimalCharSet), + digits.length(), sign == -1 ? !invert : invert); + } +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java new file mode 100644 index 0000000..b187aff --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java @@ -0,0 +1,387 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +/* + * Directly deserialize with the caller reading field-by-field a serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public interface DeserializeRead { + + /* + * The primitive type information for all fields. + */ + PrimitiveTypeInfo[] primitiveTypeInfos(); + + /* + * Set the range of bytes to be deserialized. + */ + void set(byte[] bytes, int offset, int length); + + /* + * Reads the NULL information for a field. + * + * @return Return true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + boolean readCheckNull() throws IOException; + + /* + * Call this method after all fields have been read to check for extra fields. + */ + void extraFieldsCheck(); + + /* + * Read integrity warning flags. + */ + boolean readBeyondConfiguredFieldsWarned(); + boolean readBeyondBufferRangeWarned(); + boolean bufferRangeHasExtraDataWarned(); + + /* + * BOOLEAN. + */ + boolean readBoolean() throws IOException; + + /* + * BYTE. + */ + byte readByte() throws IOException; + + /* + * SHORT. + */ + short readShort() throws IOException; + + /* + * INT. + */ + int readInt() throws IOException; + + /* + * LONG. + */ + long readLong() throws IOException; + + /* + * FLOAT. + */ + float readFloat() throws IOException; + + /* + * DOUBLE. + */ + double readDouble() throws IOException; + + /* + * This class is the base abstract read bytes results for STRING, CHAR, VARCHAR, and BINARY. + */ + public abstract class ReadBytesResults { + + public byte[] bytes; + public int start; + public int length; + + public ReadBytesResults() { + bytes = null; + start = 0; + length = 0; + } + } + + /* + * STRING. + * + * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadStringResults extends ReadBytesResults { + + public ReadStringResults() { + super(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created at initialization per different bytes field. + ReadStringResults createReadStringResults(); + + void readString(ReadStringResults readStringResults) throws IOException; + + /* + * CHAR. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadHiveCharResults extends ReadBytesResults { + + private CharTypeInfo charTypeInfo; + private int maxLength; + + protected HiveCharWritable hiveCharWritable; + + public ReadHiveCharResults() { + super(); + } + + public void init(CharTypeInfo charTypeInfo) { + this.charTypeInfo = charTypeInfo; + this.maxLength = charTypeInfo.getLength(); + hiveCharWritable = new HiveCharWritable(); + } + + public boolean isInit() { + return (charTypeInfo != null); + } + + public int getMaxLength() { + return maxLength; + } + + public HiveChar getHiveChar() { + return hiveCharWritable.getHiveChar(); + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created at initialization per different CHAR field. + ReadHiveCharResults createReadHiveCharResults(); + + void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException; + + /* + * VARCHAR. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadHiveVarcharResults extends ReadBytesResults { + + private VarcharTypeInfo varcharTypeInfo; + private int maxLength; + + protected HiveVarcharWritable hiveVarcharWritable; + + public ReadHiveVarcharResults() { + super(); + } + + public void init(VarcharTypeInfo varcharTypeInfo) { + this.varcharTypeInfo = varcharTypeInfo; + this.maxLength = varcharTypeInfo.getLength(); + hiveVarcharWritable = new HiveVarcharWritable(); + } + + public boolean isInit() { + return (varcharTypeInfo != null); + } + + public int getMaxLength() { + return maxLength; + } + + public HiveVarchar getHiveVarchar() { + return hiveVarcharWritable.getHiveVarchar(); + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created at initialization per different VARCHAR field. + ReadHiveVarcharResults createReadHiveVarcharResults(); + + void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException; + + /* + * BINARY. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadBinaryResults extends ReadBytesResults { + + public ReadBinaryResults() { + super(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created at initialization per different bytes field. + ReadBinaryResults createReadBinaryResults(); + + void readBinary(ReadBinaryResults readBinaryResults) throws IOException; + + /* + * DATE. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadDateResults { + + protected DateWritable dateWritable; + + public ReadDateResults() { + dateWritable = new DateWritable(); + } + + public Date getDate() { + return dateWritable.get(); + } + + public int getDays() { + return dateWritable.getDays(); + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created at initialization per different DATE field. + ReadDateResults createReadDateResults(); + + void readDate(ReadDateResults readDateResults) throws IOException; + + /* + * TIMESTAMP. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadTimestampResults { + + protected TimestampWritable timestampWritable; + + public ReadTimestampResults() { + timestampWritable = new TimestampWritable(); + } + + public Timestamp getTimestamp() { + return timestampWritable.getTimestamp(); + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created at initialization per different TIMESTAMP field. + ReadTimestampResults createReadTimestampResults(); + + void readTimestamp(ReadTimestampResults readTimestampResult) throws IOException; + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadIntervalYearMonthResults { + + protected HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; + + public ReadIntervalYearMonthResults() { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + + public HiveIntervalYearMonth getHiveIntervalYearMonth() { + return hiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created at initialization per different INTERVAL_YEAR_MONTH field. + ReadIntervalYearMonthResults createReadIntervalYearMonthResults(); + + void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResult) throws IOException; + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadIntervalDayTimeResults { + + protected HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; + + public ReadIntervalDayTimeResults() { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + + public HiveIntervalDayTime getHiveIntervalDayTime() { + return hiveIntervalDayTimeWritable.getHiveIntervalDayTime(); + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created at initialization per different INTERVAL_DAY_TIME field. + ReadIntervalDayTimeResults createReadIntervalDayTimeResults(); + + void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResult) throws IOException; + + /* + * DECIMAL. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadDecimalResults { + + protected DecimalTypeInfo decimalTypeInfo; + + public ReadDecimalResults() { + } + + public void init(DecimalTypeInfo decimalTypeInfo) { + this.decimalTypeInfo = decimalTypeInfo; + } + + public boolean isInit() { + return (decimalTypeInfo != null); + } + + public abstract HiveDecimal getHiveDecimal(); + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created at initialization per different DECIMAL field. + ReadDecimalResults createReadDecimalResults(); + + void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException; +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java new file mode 100644 index 0000000..8e586fb --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; + +/* + * Directly serialize with the caller writing field-by-field a serialization format. + * + * The caller is responsible for calling the write method for the right type of each field + * (or calling writeNull if the field is a NULL). + * + */ +public interface SerializeWrite { + + /* + * Set the buffer that will receive the serialized data. + */ + void set(Output output); + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + void reset(); + + /* + * Write a NULL field. + */ + void writeNull() throws IOException; + + /* + * BOOLEAN. + */ + void writeBoolean(boolean v) throws IOException; + + /* + * BYTE. + */ + void writeByte(byte v) throws IOException; + + /* + * SHORT. + */ + void writeShort(short v) throws IOException; + + /* + * INT. + */ + void writeInt(int v) throws IOException; + + /* + * LONG. + */ + void writeLong(long v) throws IOException; + + /* + * FLOAT. + */ + void writeFloat(float vf) throws IOException; + + /* + * DOUBLE. + */ + void writeDouble(double vd) throws IOException; + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + void writeString(byte[] v) throws IOException; + void writeString(byte[] v, int start, int length) throws IOException; + + /* + * CHAR. + */ + void writeHiveChar(HiveChar hiveChar) throws IOException; + + /* + * VARCHAR. + */ + void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException; + + /* + * BINARY. + */ + void writeBinary(byte[] v) throws IOException; + void writeBinary(byte[] v, int start, int length) throws IOException; + + /* + * DATE. + */ + void writeDate(Date date) throws IOException; + + // We provide a faster way to write a date without a Date object. + void writeDate(int dateAsDays) throws IOException; + + /* + * TIMESTAMP. + */ + void writeTimestamp(Timestamp vt) throws IOException; + + /* + * INTERVAL_YEAR_MONTH. + */ + void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException; + + // We provide a faster way to write a hive interval year month without a HiveIntervalYearMonth object. + void writeHiveIntervalYearMonth(int totalMonths) throws IOException; + + /* + * INTERVAL_DAY_TIME. + */ + void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException; + + // We provide a faster way to write a hive interval day time without a HiveIntervalDayTime object. + void writeHiveIntervalDayTime(long totalNanos) throws IOException; + + /* + * DECIMAL. + */ + void writeHiveDecimal(HiveDecimal dec) throws IOException; +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java index 275b064..884c3ae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java @@ -54,7 +54,7 @@ public void init(ByteArrayRef bytes, int start, int length) { } // todo this should be configured in serde - private byte[] decodeIfNeeded(byte[] recv) { + public static byte[] decodeIfNeeded(byte[] recv) { boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); if (DEBUG_LOG_ENABLED && arrayByteBase64) { LOG.debug("Data only contains Base64 alphabets only so try to decode the data."); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index ea344b3..5c58f6b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -161,7 +161,7 @@ public static void writeEscaped(OutputStream out, byte[] bytes, int start, } } } else { - out.write(bytes, 0, len); + out.write(bytes, start, len); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java new file mode 100644 index 0000000..8c5b0b3 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -0,0 +1,1062 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazy.fast; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.nio.charset.CharacterCodingException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalDayTimeResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalYearMonthResults; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyBinary; +import org.apache.hadoop.hive.serde2.lazy.LazyByte; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyShort; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.TimestampParser; + +/* + * Directly deserialize with the caller reading field-by-field the LazySimple (text) + * serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public class LazySimpleDeserializeRead implements DeserializeRead { + public static final Log LOG = LogFactory.getLog(LazySimpleDeserializeRead.class.getName()); + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private LazySerDeParameters lazyParams; + + private byte separator; + private boolean lastColumnTakesRest; + private boolean isEscaped; + private byte escapeChar; + private byte[] nullSequenceBytes; + private boolean isExtendedBooleanLiteral; + + private byte[] bytes; + private int start; + private int offset; + private int end; + private int fieldCount; + private int fieldIndex; + private int fieldStart; + private int fieldLength; + + private boolean saveBool; + private byte saveByte; + private short saveShort; + private int saveInt; + private long saveLong; + private float saveFloat; + private double saveDouble; + private byte[] saveBytes; + private int saveBytesStart; + private int saveBytesLength; + private Date saveDate; + private Timestamp saveTimestamp; + private HiveIntervalYearMonth saveIntervalYearMonth; + private HiveIntervalDayTime saveIntervalDayTime; + private HiveDecimal saveDecimal; + private DecimalTypeInfo saveDecimalTypeInfo; + + private Text tempText; + private TimestampParser timestampParser; + + private boolean readBeyondConfiguredFieldsWarned; + private boolean readBeyondBufferRangeWarned; + private boolean bufferRangeHasExtraDataWarned; + + public LazySimpleDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, + byte separator, LazySerDeParameters lazyParams) { + + this.primitiveTypeInfos = primitiveTypeInfos; + + this.separator = separator; + this.lazyParams = lazyParams; + + lastColumnTakesRest = lazyParams.isLastColumnTakesRest(); + isEscaped = lazyParams.isEscaped(); + escapeChar = lazyParams.getEscapeChar(); + nullSequenceBytes = lazyParams.getNullSequence().getBytes(); + isExtendedBooleanLiteral = lazyParams.isExtendedBooleanLiteral(); + + fieldCount = primitiveTypeInfos.length; + tempText = new Text(); + readBeyondConfiguredFieldsWarned = false; + readBeyondBufferRangeWarned = false; + bufferRangeHasExtraDataWarned = false; + } + + // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. + private LazySimpleDeserializeRead() { + } + + /* + * The primitive type information for all fields. + */ + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + /* + * Set the range of bytes to be deserialized. + */ + @Override + public void set(byte[] bytes, int offset, int length) { + this.bytes = bytes; + this.offset = offset; + start = offset; + end = offset + length; + fieldIndex = -1; + } + + /* + * Reads the NULL information for a field. + * + * @return Returns true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + @Override + public boolean readCheckNull() { + if (++fieldIndex >= fieldCount) { + // Reading beyond the specified field count produces NULL. + if (!readBeyondConfiguredFieldsWarned) { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + return true; + } + if (offset > end) { + // We must allow for an empty field at the end, so no strict >= checking. + if (!readBeyondBufferRangeWarned) { + // Warn only once. + int length = end - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more (NULLs returned)." + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; + } + + // char[] charsBuffer = new char[end - start]; + // for (int c = 0; c < charsBuffer.length; c++) { + // charsBuffer[c] = (char) (bytes[start + c] & 0xFF); + // } + + return true; + } + + fieldStart = offset; + while (true) { + if (offset >= end) { + fieldLength = offset - fieldStart; + break; + } + if (bytes[offset] == separator) { + fieldLength = (offset++ - fieldStart); + break; + } + if (isEscaped && bytes[offset] == escapeChar + && offset + 1 < end) { + // Ignore the char after escape char. + offset += 2; + } else { + offset++; + } + } + + char[] charField = new char[fieldLength]; + for (int c = 0; c < charField.length; c++) { + charField[c] = (char) (bytes[fieldStart + c] & 0xFF); + } + + // Is the field the configured string representing NULL? + if (nullSequenceBytes != null) { + if (fieldLength == nullSequenceBytes.length) { + int i = 0; + while (true) { + if (bytes[fieldStart + i] != nullSequenceBytes[i]) { + break; + } + i++; + if (i >= fieldLength) { + return true; + } + } + } + } + + switch (primitiveTypeInfos[fieldIndex].getPrimitiveCategory()) { + case BOOLEAN: + { + int i = fieldStart; + if (fieldLength == 4) { + if ((bytes[i] == 'T' || bytes[i] == 't') && + (bytes[i + 1] == 'R' || bytes[i + 1] == 'r') && + (bytes[i + 2] == 'U' || bytes[i + 1] == 'u') && + (bytes[i + 3] == 'E' || bytes[i + 3] == 'e')) { + saveBool = true; + } else { + // No boolean value match for 5 char field. + return true; + } + } else if (fieldLength == 5) { + if ((bytes[i] == 'F' || bytes[i] == 'f') && + (bytes[i + 1] == 'A' || bytes[i + 1] == 'a') && + (bytes[i + 2] == 'L' || bytes[i + 2] == 'l') && + (bytes[i + 3] == 'S' || bytes[i + 3] == 's') && + (bytes[i + 4] == 'E' || bytes[i + 4] == 'e')) { + saveBool = false; + } else { + // No boolean value match for 4 char field. + return true; + } + } else if (isExtendedBooleanLiteral && fieldLength == 1) { + byte b = bytes[fieldStart]; + if (b == '1' || b == 't' || b == 'T') { + saveBool = true; + } else if (b == '0' || b == 'f' || b == 'F') { + saveBool = false; + } else { + // No boolean value match for extended 1 char field. + return true; + } + } else { + // No boolean value match for other lengths. + return true; + } + } + break; + case BYTE: + try { + saveByte = LazyByte.parseByte(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "TINYINT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } +// saveShort = (short) saveLong; +// if (saveShort != saveLong) { +// return true; +// } + break; + case SHORT: + try { + saveShort = LazyShort.parseShort(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "SMALLINT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } +// saveShort = (short) saveLong; +// if (saveShort != saveLong) { +// return true; +// } + break; + case INT: + try { + saveInt = LazyInteger.parseInt(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "INT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } +// saveInt = (int) saveLong; +// if (saveInt != saveLong) { +// return true; +// } + break; + case LONG: + try { + saveLong = LazyLong.parseLong(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "BIGINT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } + break; + case FLOAT: + { + String byteData = null; + try { + byteData = Text.decode(bytes, fieldStart, fieldLength); + saveFloat = Float.parseFloat(byteData); + } catch (NumberFormatException e) { + LOG.debug("Data not in the Float data type range so converted to null. Given data is :" + + byteData, e); + return true; + } catch (CharacterCodingException e) { + LOG.debug("Data not in the Float data type range so converted to null.", e); + return true; + } + } +// if (!parseFloat()) { +// return true; +// } + break; + case DOUBLE: + { + String byteData = null; + try { + byteData = Text.decode(bytes, fieldStart, fieldLength); + saveDouble = Double.parseDouble(byteData); + } catch (NumberFormatException e) { + LOG.debug("Data not in the Double data type range so converted to null. Given data is :" + + byteData, e); + return true; + } catch (CharacterCodingException e) { + LOG.debug("Data not in the Double data type range so converted to null.", e); + return true; + } + } +// if (!parseDouble()) { +// return true; +// } + break; + + case STRING: + case CHAR: + case VARCHAR: + if (isEscaped) { + LazyUtils.copyAndEscapeStringDataToText(bytes, fieldStart, fieldLength, escapeChar, tempText); + saveBytes = tempText.getBytes(); + saveBytesStart = 0; + saveBytesLength = tempText.getLength(); + } else { + // if the data is not escaped, simply copy the data. + saveBytes = bytes; + saveBytesStart = fieldStart; + saveBytesLength = fieldLength; + } + break; + case BINARY: + { + byte[] recv = new byte[fieldLength]; + System.arraycopy(bytes, fieldStart, recv, 0, fieldLength); + byte[] decoded = LazyBinary.decodeIfNeeded(recv); + // use the original bytes in case decoding should fail + decoded = decoded.length > 0 ? decoded : recv; + saveBytes = decoded; + saveBytesStart = 0; + saveBytesLength = decoded.length; + } + break; + case DATE: + { + String s = null; + try { + s = Text.decode(bytes, fieldStart, fieldLength); + saveDate = Date.valueOf(s); + } catch (Exception e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "DATE"); + return true; + } + } +// if (!parseDate()) { +// return true; +// } + break; + case TIMESTAMP: + { + String s = null; + try { + s = new String(bytes, fieldStart, fieldLength, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + LOG.error(e); + s = ""; + } + + if (s.compareTo("NULL") == 0) { + logExceptionMessage(bytes, fieldStart, fieldLength, "TIMESTAMP"); + return true; + } else { + try { + if (timestampParser == null) { + timestampParser = new TimestampParser(); + } + saveTimestamp = timestampParser.parseTimestamp(s); + } catch (IllegalArgumentException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "TIMESTAMP"); + return true; + } + } + } +// if (!parseTimestamp()) { +// return true; +// } + break; + case INTERVAL_YEAR_MONTH: + { + String s = null; + try { + s = Text.decode(bytes, fieldStart, fieldLength); + saveIntervalYearMonth = HiveIntervalYearMonth.valueOf(s); + } catch (Exception e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "INTERVAL_YEAR_MONTH"); + return true; + } + } +// if (!parseIntervalYearMonth()) { +// return true; +// } + break; + case INTERVAL_DAY_TIME: + { + String s = null; + try { + s = Text.decode(bytes, fieldStart, fieldLength); + saveIntervalDayTime = HiveIntervalDayTime.valueOf(s); + } catch (Exception e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "INTERVAL_DAY_TIME"); + return true; + } + } +// if (!parseIntervalDayTime()) { +// return true; +// } + break; + case DECIMAL: + { + String byteData = null; + try { + byteData = Text.decode(bytes, fieldStart, fieldLength); + } catch (CharacterCodingException e) { + LOG.debug("Data not in the HiveDecimal data type range so converted to null.", e); + return true; + } + + saveDecimal = HiveDecimal.create(byteData); + saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + int precision = saveDecimalTypeInfo.getPrecision(); + int scale = saveDecimalTypeInfo.getScale(); + saveDecimal = HiveDecimalUtils.enforcePrecisionScale(saveDecimal, precision, scale); + if (saveDecimal == null) { + LOG.debug("Data not in the HiveDecimal data type range so converted to null. Given data is :" + + byteData); + return true; + } + } +// if (!parseDecimal()) { +// return true; +// } + break; + + default: + throw new Error("Unexpected primitive category " + primitiveTypeInfos[fieldIndex].getPrimitiveCategory()); + } + + return false; + } + + public void logExceptionMessage(byte[] bytes, int bytesStart, int bytesLength, String dataType) { + try { + if(LOG.isDebugEnabled()) { + String byteData = Text.decode(bytes, bytesStart, bytesLength); + LOG.debug("Data not in the " + dataType + + " data type range so converted to null. Given data is :" + + byteData, new Exception("For debugging purposes")); + } + } catch (CharacterCodingException e1) { + LOG.debug("Data not in the " + dataType + " data type range so converted to null.", e1); + } + } + + /* + * Call this method after all fields have been read to check for extra fields. + */ + public void extraFieldsCheck() { + if (offset < end) { + // We did not consume all of the byte range. + if (!bufferRangeHasExtraDataWarned) { + // Warn only once. + int length = end - start; + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + + " for length " + length + " but reading more (NULLs returned)." + + " Ignoring similar problems."); + bufferRangeHasExtraDataWarned = true; + } + } + } + + /* + * Read integrity warning flags. + */ + @Override + public boolean readBeyondConfiguredFieldsWarned() { + return readBeyondConfiguredFieldsWarned; + } + @Override + public boolean readBeyondBufferRangeWarned() { + return readBeyondBufferRangeWarned; + } + @Override + public boolean bufferRangeHasExtraDataWarned() { + return bufferRangeHasExtraDataWarned; + } + + /* + * BOOLEAN. + */ + @Override + public boolean readBoolean() { + return saveBool; + } + + /* + * BYTE. + */ + @Override + public byte readByte() { + return saveByte; + } + + /* + * SHORT. + */ + @Override + public short readShort() { + return saveShort; + } + + /* + * INT. + */ + @Override + public int readInt() { + return saveInt; + } + + /* + * LONG. + */ + @Override + public long readLong() { + return saveLong; + } + + /* + * FLOAT. + */ + @Override + public float readFloat() { + return saveFloat; + } + + /* + * DOUBLE. + */ + @Override + public double readDouble() { + return saveDouble; + } + + /* + * STRING. + * + * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + + // This class is for internal use. + private class LazySimpleReadStringResults extends ReadStringResults { + public LazySimpleReadStringResults() { + super(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadStringResults createReadStringResults() { + return new LazySimpleReadStringResults(); + } + + @Override + public void readString(ReadStringResults readStringResults) { + readStringResults.bytes = saveBytes; + readStringResults.start = saveBytesStart; + readStringResults.length = saveBytesLength; + } + + /* + * CHAR. + */ + + // This class is for internal use. + private static class LazySimpleReadHiveCharResults extends ReadHiveCharResults { + + // Use our STRING reader. + public LazySimpleReadStringResults readStringResults; + + public LazySimpleReadHiveCharResults() { + super(); + } + + public HiveCharWritable getHiveCharWritable() { + return hiveCharWritable; + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different CHAR field. + @Override + public ReadHiveCharResults createReadHiveCharResults() { + return new LazySimpleReadHiveCharResults(); + } + + public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { + LazySimpleReadHiveCharResults LazySimpleReadHiveCharResults = (LazySimpleReadHiveCharResults) readHiveCharResults; + + if (!LazySimpleReadHiveCharResults.isInit()) { + LazySimpleReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (LazySimpleReadHiveCharResults.readStringResults == null) { + LazySimpleReadHiveCharResults.readStringResults = new LazySimpleReadStringResults(); + } + LazySimpleReadStringResults readStringResults = LazySimpleReadHiveCharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveCharWritable hiveCharWritable = LazySimpleReadHiveCharResults.getHiveCharWritable(); + hiveCharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveCharWritable.enforceMaxLength(LazySimpleReadHiveCharResults.getMaxLength()); + + readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); + readHiveCharResults.start = 0; + readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); + } + + /* + * VARCHAR. + */ + + // This class is for internal use. + private static class LazySimpleReadHiveVarcharResults extends ReadHiveVarcharResults { + + // Use our bytes reader. + public LazySimpleReadStringResults readStringResults; + + public LazySimpleReadHiveVarcharResults() { + super(); + } + + public HiveVarcharWritable getHiveVarcharWritable() { + return hiveVarcharWritable; + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different VARCHAR field. + @Override + public ReadHiveVarcharResults createReadHiveVarcharResults() { + return new LazySimpleReadHiveVarcharResults(); + } + + public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { + LazySimpleReadHiveVarcharResults lazySimpleReadHiveVarvarcharResults = (LazySimpleReadHiveVarcharResults) readHiveVarcharResults; + + if (!lazySimpleReadHiveVarvarcharResults.isInit()) { + lazySimpleReadHiveVarvarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (lazySimpleReadHiveVarvarcharResults.readStringResults == null) { + lazySimpleReadHiveVarvarcharResults.readStringResults = new LazySimpleReadStringResults(); + } + LazySimpleReadStringResults readStringResults = lazySimpleReadHiveVarvarcharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveVarcharWritable hiveVarcharWritable = lazySimpleReadHiveVarvarcharResults.getHiveVarcharWritable(); + hiveVarcharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveVarcharWritable.enforceMaxLength(lazySimpleReadHiveVarvarcharResults.getMaxLength()); + + readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); + readHiveVarcharResults.start = 0; + readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); + } + + /* + * BINARY. + */ + + // This class is for internal use. + private class LazySimpleReadBinaryResults extends ReadBinaryResults { + public LazySimpleReadBinaryResults() { + super(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadBinaryResults createReadBinaryResults() { + return new LazySimpleReadBinaryResults(); + } + + @Override + public void readBinary(ReadBinaryResults readBinaryResults) { + readBinaryResults.bytes = saveBytes; + readBinaryResults.start = saveBytesStart; + readBinaryResults.length = saveBytesLength; + } + + /* + * DATE. + */ + + // This class is for internal use. + private static class LazySimpleReadDateResults extends ReadDateResults { + + public LazySimpleReadDateResults() { + super(); + } + + public DateWritable getDateWritable() { + return dateWritable; + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DATE field. + @Override + public ReadDateResults createReadDateResults() { + return new LazySimpleReadDateResults(); + } + + @Override + public void readDate(ReadDateResults readDateResults) { + LazySimpleReadDateResults lazySimpleReadDateResults = (LazySimpleReadDateResults) readDateResults; + + DateWritable dateWritable = lazySimpleReadDateResults.getDateWritable(); + dateWritable.set(saveDate); + saveDate = null; + } + + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for internal use. + private static class LazySimpleReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { + + public LazySimpleReadIntervalYearMonthResults() { + super(); + } + + public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { + return hiveIntervalYearMonthWritable; + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_YEAR_MONTH field. + @Override + public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { + return new LazySimpleReadIntervalYearMonthResults(); + } + + @Override + public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) + throws IOException { + LazySimpleReadIntervalYearMonthResults lazySimpleReadIntervalYearMonthResults = + (LazySimpleReadIntervalYearMonthResults) readIntervalYearMonthResults; + + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + lazySimpleReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); + hiveIntervalYearMonthWritable.set(saveIntervalYearMonth); + saveIntervalYearMonth = null; + } + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for internal use. + private static class LazySimpleReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { + + public LazySimpleReadIntervalDayTimeResults() { + super(); + } + + public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { + return hiveIntervalDayTimeWritable; + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_DAY_TIME field. + @Override + public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { + return new LazySimpleReadIntervalDayTimeResults(); + } + + @Override + public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) + throws IOException { + LazySimpleReadIntervalDayTimeResults lazySimpleReadIntervalDayTimeResults = + (LazySimpleReadIntervalDayTimeResults) readIntervalDayTimeResults; + + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + lazySimpleReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); + hiveIntervalDayTimeWritable.set(saveIntervalDayTime); + saveIntervalDayTime = null; + } + + /* + * TIMESTAMP. + */ + + // This class is for internal use. + private static class LazySimpleReadTimestampResults extends ReadTimestampResults { + + public LazySimpleReadTimestampResults() { + super(); + } + + public TimestampWritable getTimestampWritable() { + return timestampWritable; + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different TIMESTAMP field. + @Override + public ReadTimestampResults createReadTimestampResults() { + return new LazySimpleReadTimestampResults(); + } + + @Override + public void readTimestamp(ReadTimestampResults readTimestampResults) { + LazySimpleReadTimestampResults lazySimpleReadTimestampResults = + (LazySimpleReadTimestampResults) readTimestampResults; + + TimestampWritable timestampWritable = lazySimpleReadTimestampResults.getTimestampWritable(); + timestampWritable.set(saveTimestamp); + saveTimestamp = null; + } + + /* + * DECIMAL. + */ + + // This class is for internal use. + private static class LazySimpleReadDecimalResults extends ReadDecimalResults { + + HiveDecimal hiveDecimal; + + public LazySimpleReadDecimalResults() { + super(); + } + + @Override + public HiveDecimal getHiveDecimal() { + return hiveDecimal; + } + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DECIMAL field. + @Override + public ReadDecimalResults createReadDecimalResults() { + return new LazySimpleReadDecimalResults(); + } + + @Override + public void readHiveDecimal(ReadDecimalResults readDecimalResults) { + LazySimpleReadDecimalResults lazySimpleReadDecimalResults = (LazySimpleReadDecimalResults) readDecimalResults; + + if (!lazySimpleReadDecimalResults.isInit()) { + lazySimpleReadDecimalResults.init(saveDecimalTypeInfo); + } + + lazySimpleReadDecimalResults.hiveDecimal = saveDecimal; + + saveDecimal = null; + saveDecimalTypeInfo = null; + } + + private static byte[] maxLongBytes = ((Long) Long.MAX_VALUE).toString().getBytes(); + private static int maxLongDigitsCount = maxLongBytes.length; + private static byte[] minLongNoSignBytes = ((Long) Long.MIN_VALUE).toString().substring(1).getBytes(); + + private boolean parseLongFast() { + + // Parse without using exceptions for better performance. + int i = fieldStart; + int end = fieldStart + fieldLength; + boolean negative = false; + if (i >= end) { + return false; // Empty field. + } + if (bytes[i] == '+') { + i++; + if (i >= end) { + return false; + } + } else if (bytes[i] == '-') { + negative = true; + i++; + if (i >= end) { + return false; + } + } + // Skip leading zeros. + boolean atLeastOneZero = false; + while (true) { + if (bytes[i] != '0') { + break; + } + i++; + if (i >= end) { + saveLong = 0; + return true; + } + atLeastOneZero = true; + } + // We tolerate and ignore decimal places. + if (bytes[i] == '.') { + if (!atLeastOneZero) { + return false; + } + saveLong = 0; + // Fall through below and verify trailing decimal digits. + } else { + if (!Character.isDigit(bytes[i])) { + return false; + } + int nonLeadingZeroStart = i; + int digitCount = 1; + saveLong = Character.digit(bytes[i], 10); + i++; + while (i < end) { + if (!Character.isDigit(bytes[i])) { + break; + } + digitCount++; + if (digitCount > maxLongDigitsCount) { + return false; + } else if (digitCount == maxLongDigitsCount) { + // Use the old trick of comparing against number string to check for overflow. + if (!negative) { + if (byteArrayCompareRanges(bytes, nonLeadingZeroStart, maxLongBytes, 0, digitCount) >= 1) { + return false; + } + } else { + if (byteArrayCompareRanges(bytes, nonLeadingZeroStart, minLongNoSignBytes, 0, digitCount) >= 1) { + return false; + } + } + } + saveLong = (saveLong * 10) + Character.digit(bytes[i], 10); + } + if (negative) { + // Safe because of our number string comparision against min (negative) long. + saveLong = -saveLong; + } + if (i >= end) { + return true; + } + if (bytes[i] != '.') { + return false; + } + } + // Fall through to here if we detect the start of trailing decimal digits... + // We verify trailing digits only. + while (true) { + i++; + if (i >= end) { + break; + } + if (!Character.isDigit(bytes[i])) { + return false; + } + } + return true; + } + + public static int byteArrayCompareRanges(byte[] arg1, int start1, byte[] arg2, int start2, int len) { + for (int i = 0; i < len; i++) { + // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer. + int b1 = arg1[i + start1] & 0xff; + int b2 = arg2[i + start2] & 0xff; + if (b1 != b2) { + return b1 - b2; + } + } + return 0; + } + +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java new file mode 100644 index 0000000..0771b12 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java @@ -0,0 +1,510 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazy.fast; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyDate; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalDayTime; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalYearMonth; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/* + * Directly serialize, field-by-field, the LazyBinary format. +* + * This is an alternative way to serialize than what is provided by LazyBinarySerDe. + */ +public class LazySimpleSerializeWrite implements SerializeWrite { + public static final Log LOG = LogFactory.getLog(LazySimpleSerializeWrite.class.getName()); + + private LazySerDeParameters lazyParams; + + private byte separator; + private boolean[] needsEscape; + private boolean isEscaped; + private byte escapeChar; + private byte[] nullSequenceBytes; + + private Output output; + + private int fieldCount; + private int index; + + // For thread safety, we allocate private writable objects for our use only. + private DateWritable dateWritable; + private TimestampWritable timestampWritable; + private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; + private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; + private HiveIntervalDayTime hiveIntervalDayTime; + + public LazySimpleSerializeWrite(int fieldCount, + byte separator, LazySerDeParameters lazyParams) { + + this(); + this.fieldCount = fieldCount; + + this.separator = separator; + this.lazyParams = lazyParams; + + isEscaped = lazyParams.isEscaped(); + escapeChar = lazyParams.getEscapeChar(); + needsEscape = lazyParams.getNeedsEscape(); + nullSequenceBytes = lazyParams.getNullSequence().getBytes(); + } + + // Not public since we must have the field count and other information. + private LazySimpleSerializeWrite() { + } + + /* + * Set the buffer that will receive the serialized data. + */ + @Override + public void set(Output output) { + this.output = output; + output.reset(); + index = 0; + } + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + @Override + public void reset() { + output.reset(); + index = 0; + } + + /* + * General Pattern: + * + * if (index > 0) { + * output.write(separator); + * } + * + * WHEN NOT NULL: Write value. + * OTHERWISE NULL: Write nullSequenceBytes. + * + * Increment index + * + */ + + /* + * Write a NULL field. + */ + @Override + public void writeNull() throws IOException { + + if (index > 0) { + output.write(separator); + } + + output.write(nullSequenceBytes); + + index++; + } + + /* + * BOOLEAN. + */ + @Override + public void writeBoolean(boolean v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (v) { + output.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length); + } else { + output.write(LazyUtils.falseBytes, 0, LazyUtils.falseBytes.length); + } + + index++; + } + + /* + * BYTE. + */ + @Override + public void writeByte(byte v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyInteger.writeUTF8(output, v); + + index++; + } + + /* + * SHORT. + */ + @Override + public void writeShort(short v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyInteger.writeUTF8(output, v); + + index++; + } + + /* + * INT. + */ + @Override + public void writeInt(int v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyInteger.writeUTF8(output, v); + + index++; + } + + /* + * LONG. + */ + @Override + public void writeLong(long v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyLong.writeUTF8(output, v); + + index++; + } + + /* + * FLOAT. + */ + @Override + public void writeFloat(float vf) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(String.valueOf(vf)); + output.write(b.array(), 0, b.limit()); + + index++; + } + + /* + * DOUBLE. + */ + @Override + public void writeDouble(double v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(String.valueOf(v)); + output.write(b.array(), 0, b.limit()); + + index++; + } + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + @Override + public void writeString(byte[] v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyUtils.writeEscaped(output, v, 0, v.length, isEscaped, escapeChar, + needsEscape); + + index++; + } + + @Override + public void writeString(byte[] v, int start, int length) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyUtils.writeEscaped(output, v, start, length, isEscaped, escapeChar, + needsEscape); + + index++; + } + + /* + * CHAR. + */ + @Override + public void writeHiveChar(HiveChar hiveChar) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(hiveChar.getPaddedValue()); + LazyUtils.writeEscaped(output, b.array(), 0, b.limit(), isEscaped, escapeChar, + needsEscape); + + index++; + } + + /* + * VARCHAR. + */ + @Override + public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(hiveVarchar.getValue()); + LazyUtils.writeEscaped(output, b.array(), 0, b.limit(), isEscaped, escapeChar, + needsEscape); + + index++; + } + + /* + * BINARY. + */ + @Override + public void writeBinary(byte[] v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + byte[] toEncode = new byte[v.length]; + System.arraycopy(v, 0, toEncode, 0, v.length); + byte[] toWrite = Base64.encodeBase64(toEncode); + output.write(toWrite, 0, toWrite.length); + + index++; + } + + @Override + public void writeBinary(byte[] v, int start, int length) throws IOException { + + if (index > 0) { + output.write(separator); + } + + byte[] toEncode = new byte[length]; + System.arraycopy(v, start, toEncode, 0, length); + byte[] toWrite = Base64.encodeBase64(toEncode); + output.write(toWrite, 0, toWrite.length); + + index++; + } + + /* + * DATE. + */ + @Override + public void writeDate(Date date) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (dateWritable == null) { + dateWritable = new DateWritable(); + } + dateWritable.set(date); + LazyDate.writeUTF8(output, dateWritable); + + index++; + } + + // We provide a faster way to write a date without a Date object. + @Override + public void writeDate(int dateAsDays) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (dateWritable == null) { + dateWritable = new DateWritable(); + } + dateWritable.set(dateAsDays); + LazyDate.writeUTF8(output, dateWritable); + + index++; + } + + /* + * TIMESTAMP. + */ + @Override + public void writeTimestamp(Timestamp v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + } + timestampWritable.set(v); + LazyTimestamp.writeUTF8(output, timestampWritable); + + index++; + } + + /* + * INTERVAL_YEAR_MONTH. + */ + @Override + public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(viyt); + LazyHiveIntervalYearMonth.writeUTF8(output, hiveIntervalYearMonthWritable); + + index++; + } + + + @Override + public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(totalMonths); + LazyHiveIntervalYearMonth.writeUTF8(output, hiveIntervalYearMonthWritable); + + index++; + } + + /* + * INTERVAL_DAY_TIME. + */ + @Override + public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + hiveIntervalDayTimeWritable.set(vidt); + LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable); + + index++; + } + + @Override + public void writeHiveIntervalDayTime(long totalNanos) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalDayTime == null) { + hiveIntervalDayTime = new HiveIntervalDayTime(); + } + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); + hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); + LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable); + + index++; + } + + /* + * DECIMAL. + */ + @Override + public void writeHiveDecimal(HiveDecimal v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyHiveDecimal.writeUTF8(output, v); + + index++; + } +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java index 8819703..43255cd 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java @@ -166,15 +166,18 @@ private void parse() { // Extra bytes at the end? if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) { extraFieldWarned = true; - LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " - + "problems."); + LOG.warn("Extra bytes detected at the end of the row! " + + "Last field end " + lastFieldByteEnd + " and serialize buffer end " + structByteEnd + ". " + + "Ignoring similar problems."); } // Missing fields? if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) { missingFieldWarned = true; - LOG.info("Missing fields! Expected " + fields.length + " fields but " - + "only got " + fieldId + "! Ignoring similar problems."); + LOG.info("Missing fields! Expected " + fields.length + " fields but " + + "only got " + fieldId + "! " + + "Last field end " + lastFieldByteEnd + " and serialize buffer end " + structByteEnd + ". " + + "Ignoring similar problems."); } Arrays.fill(fieldInited, false); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java new file mode 100644 index 0000000..a18e8b8 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -0,0 +1,942 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazybinary.fast; + +import java.io.EOFException; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +/* + * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public class LazyBinaryDeserializeRead implements DeserializeRead { + public static final Log LOG = LogFactory.getLog(LazyBinaryDeserializeRead.class.getName()); + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private byte[] bytes; + private int start; + private int offset; + private int end; + private int fieldCount; + private int fieldIndex; + private byte nullByte; + + private DecimalTypeInfo saveDecimalTypeInfo; + private HiveDecimal saveDecimal; + + // Object to receive results of reading a decoded variable length int or long. + private VInt tempVInt; + private VLong tempVLong; + private HiveDecimalWritable tempHiveDecimalWritable; + + private boolean readBeyondConfiguredFieldsWarned; + private boolean readBeyondBufferRangeWarned; + private boolean bufferRangeHasExtraDataWarned; + + public LazyBinaryDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) { + this.primitiveTypeInfos = primitiveTypeInfos; + fieldCount = primitiveTypeInfos.length; + tempVInt = new VInt(); + tempVLong = new VLong(); + readBeyondConfiguredFieldsWarned = false; + readBeyondBufferRangeWarned = false; + bufferRangeHasExtraDataWarned = false; + } + + // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. + private LazyBinaryDeserializeRead() { + } + + /* + * The primitive type information for all fields. + */ + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + /* + * Set the range of bytes to be deserialized. + */ + @Override + public void set(byte[] bytes, int offset, int length) { + this.bytes = bytes; + this.offset = offset; + start = offset; + end = offset + length; + fieldIndex = 0; + } + + /* + * Reads the NULL information for a field. + * + * @return Returns true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + @Override + public boolean readCheckNull() throws IOException { + if (fieldIndex >= fieldCount) { + // Reading beyond the specified field count produces NULL. + if (!readBeyondConfiguredFieldsWarned) { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + return true; + } + + if (fieldIndex == 0) { + // The rest of the range check for fields after the first is below after checking + // the NULL byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + + // NOTE: The bit is set to 1 if a field is NOT NULL. + if ((nullByte & (1 << (fieldIndex % 8))) != 0) { + + // Make sure there is at least one byte that can be read for a value. + if (offset >= end) { + // Careful: since we may be dealing with NULLs in the final NULL byte, we check after + // the NULL byte check.. + warnBeyondEof(); + } + + // We have a field and are positioned to it. + + if (primitiveTypeInfos[fieldIndex].getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } + + // When NULL, we need to move past this field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return true; + } + + /* + * Call this method after all fields have been read to check for extra fields. + */ + public void extraFieldsCheck() { + if (offset < end) { + // We did not consume all of the byte range. + if (!bufferRangeHasExtraDataWarned) { + // Warn only once. + int length = end - start; + int remaining = end - offset; + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + + " for length " + length + " but " + remaining + " bytes remain. " + + "(total buffer length " + bytes.length + ")" + + " Ignoring similar problems."); + bufferRangeHasExtraDataWarned = true; + } + } + } + + /* + * Read integrity warning flags. + */ + @Override + public boolean readBeyondConfiguredFieldsWarned() { + return readBeyondConfiguredFieldsWarned; + } + @Override + public boolean readBeyondBufferRangeWarned() { + return readBeyondBufferRangeWarned; + } + @Override + public boolean bufferRangeHasExtraDataWarned() { + return bufferRangeHasExtraDataWarned; + } + + private void warnBeyondEof() throws EOFException { + if (!readBeyondBufferRangeWarned) { + // Warn only once. + int length = end - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more... " + + "(total buffer length " + bytes.length + ")" + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; + } + } + + /* + * BOOLEAN. + */ + @Override + public boolean readBoolean() throws IOException { + // No check needed for single byte read. + byte result = bytes[offset++]; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return (result != 0); + } + + /* + * BYTE. + */ + @Override + public byte readByte() throws IOException { + // No check needed for single byte read. + byte result = bytes[offset++]; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + if ((fieldIndex % 8) == 0) { + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * SHORT. + */ + @Override + public short readShort() throws IOException { + // Last item -- ok to be at end. + if (offset + 2 > end) { + warnBeyondEof(); + } + short result = LazyBinaryUtils.byteArrayToShort(bytes, offset); + offset += 2; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * INT. + */ + @Override + public int readInt() throws IOException { + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return tempVInt.value; + } + + /* + * LONG. + */ + @Override + public long readLong() throws IOException { + LazyBinaryUtils.readVLong(bytes, offset, tempVLong); + offset += tempVLong.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return tempVLong.value; + } + + /* + * FLOAT. + */ + @Override + public float readFloat() throws IOException { + // Last item -- ok to be at end. + if (offset + 4 > end) { + warnBeyondEof(); + } + float result = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset)); + offset += 4; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * DOUBLE. + */ + @Override + public double readDouble() throws IOException { + // Last item -- ok to be at end. + if (offset + 8 > end) { + warnBeyondEof(); + } + double result = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset)); + offset += 8; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * STRING. + * + * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + + // This class is for internal use. + private class LazyBinaryReadStringResults extends ReadStringResults { + public LazyBinaryReadStringResults() { + super(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadStringResults createReadStringResults() { + return new LazyBinaryReadStringResults(); + } + + @Override + public void readString(ReadStringResults readStringResults) throws IOException { + // using vint instead of 4 bytes + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Could be last item for empty string -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + int saveStart = offset; + int length = tempVInt.value; + offset += length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + readStringResults.bytes = bytes; + readStringResults.start = saveStart; + readStringResults.length = length; + } + + /* + * CHAR. + */ + + // This class is for internal use. + private static class LazyBinaryReadHiveCharResults extends ReadHiveCharResults { + + // Use our STRING reader. + public LazyBinaryReadStringResults readStringResults; + + public LazyBinaryReadHiveCharResults() { + super(); + } + + public HiveCharWritable getHiveCharWritable() { + return hiveCharWritable; + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different CHAR field. + @Override + public ReadHiveCharResults createReadHiveCharResults() { + return new LazyBinaryReadHiveCharResults(); + } + + public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { + LazyBinaryReadHiveCharResults lazyBinaryReadHiveCharResults = (LazyBinaryReadHiveCharResults) readHiveCharResults; + + if (!lazyBinaryReadHiveCharResults.isInit()) { + lazyBinaryReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (lazyBinaryReadHiveCharResults.readStringResults == null) { + lazyBinaryReadHiveCharResults.readStringResults = new LazyBinaryReadStringResults(); + } + LazyBinaryReadStringResults readStringResults = lazyBinaryReadHiveCharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveCharWritable hiveCharWritable = lazyBinaryReadHiveCharResults.getHiveCharWritable(); + hiveCharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveCharWritable.enforceMaxLength(lazyBinaryReadHiveCharResults.getMaxLength()); + + readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); + readHiveCharResults.start = 0; + readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); + } + + /* + * VARCHAR. + */ + + // This class is for internal use. + private static class LazyBinaryReadHiveVarcharResults extends ReadHiveVarcharResults { + + // Use our STRING reader. + public LazyBinaryReadStringResults readStringResults; + + public LazyBinaryReadHiveVarcharResults() { + super(); + } + + public HiveVarcharWritable getHiveVarcharWritable() { + return hiveVarcharWritable; + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different VARCHAR field. + @Override + public ReadHiveVarcharResults createReadHiveVarcharResults() { + return new LazyBinaryReadHiveVarcharResults(); + } + + public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { + LazyBinaryReadHiveVarcharResults lazyBinaryReadHiveVarcharResults = (LazyBinaryReadHiveVarcharResults) readHiveVarcharResults; + + if (!lazyBinaryReadHiveVarcharResults.isInit()) { + lazyBinaryReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (lazyBinaryReadHiveVarcharResults.readStringResults == null) { + lazyBinaryReadHiveVarcharResults.readStringResults = new LazyBinaryReadStringResults(); + } + LazyBinaryReadStringResults readStringResults = lazyBinaryReadHiveVarcharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveVarcharWritable hiveVarcharWritable = lazyBinaryReadHiveVarcharResults.getHiveVarcharWritable(); + hiveVarcharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveVarcharWritable.enforceMaxLength(lazyBinaryReadHiveVarcharResults.getMaxLength()); + + readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); + readHiveVarcharResults.start = 0; + readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); + } + + /* + * BINARY. + */ + + // This class is for internal use. + private class LazyBinaryReadBinaryResults extends ReadBinaryResults { + + // Use our STRING reader. + public LazyBinaryReadStringResults readStringResults; + + public LazyBinaryReadBinaryResults() { + super(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadBinaryResults createReadBinaryResults() { + return new LazyBinaryReadBinaryResults(); + } + + public void readBinary(ReadBinaryResults readBinaryResults) throws IOException { + LazyBinaryReadBinaryResults lazyBinaryReadBinaryResults = (LazyBinaryReadBinaryResults) readBinaryResults; + + if (lazyBinaryReadBinaryResults.readStringResults == null) { + lazyBinaryReadBinaryResults.readStringResults = new LazyBinaryReadStringResults(); + } + LazyBinaryReadStringResults readStringResults = lazyBinaryReadBinaryResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + readBinaryResults.bytes = readStringResults.bytes; + readBinaryResults.start = readStringResults.start; + readBinaryResults.length = readStringResults.length; + } + + /* + * DATE. + */ + + // This class is for internal use. + private static class LazyBinaryReadDateResults extends ReadDateResults { + + public LazyBinaryReadDateResults() { + super(); + } + + public DateWritable getDateWritable() { + return dateWritable; + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DATE field. + @Override + public ReadDateResults createReadDateResults() { + return new LazyBinaryReadDateResults(); + } + + @Override + public void readDate(ReadDateResults readDateResults) throws IOException { + LazyBinaryReadDateResults lazyBinaryReadDateResults = (LazyBinaryReadDateResults) readDateResults; + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + DateWritable dateWritable = lazyBinaryReadDateResults.getDateWritable(); + dateWritable.set(tempVInt.value); + } + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for internal use. + private static class LazyBinaryReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { + + public LazyBinaryReadIntervalYearMonthResults() { + super(); + } + + public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { + return hiveIntervalYearMonthWritable; + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_YEAR_MONTH field. + @Override + public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { + return new LazyBinaryReadIntervalYearMonthResults(); + } + + @Override + public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) + throws IOException { + LazyBinaryReadIntervalYearMonthResults lazyBinaryReadIntervalYearMonthResults = + (LazyBinaryReadIntervalYearMonthResults) readIntervalYearMonthResults; + + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + lazyBinaryReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); + hiveIntervalYearMonthWritable.set(tempVInt.value); + } + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for internal use. + private static class LazyBinaryReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { + + public LazyBinaryReadIntervalDayTimeResults() { + super(); + } + + public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { + return hiveIntervalDayTimeWritable; + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_DAY_TIME field. + @Override + public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { + return new LazyBinaryReadIntervalDayTimeResults(); + } + + @Override + public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) + throws IOException { + LazyBinaryReadIntervalDayTimeResults lazyBinaryReadIntervalDayTimeResults = + (LazyBinaryReadIntervalDayTimeResults) readIntervalDayTimeResults; + LazyBinaryUtils.readVLong(bytes, offset, tempVLong); + offset += tempVLong.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + lazyBinaryReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); + hiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value); + } + + /* + * TIMESTAMP. + */ + + // This class is for internal use. + private static class LazyBinaryReadTimestampResults extends ReadTimestampResults { + + public LazyBinaryReadTimestampResults() { + super(); + } + + public TimestampWritable getTimestampWritable() { + return timestampWritable; + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different TIMESTAMP field. + @Override + public ReadTimestampResults createReadTimestampResults() { + return new LazyBinaryReadTimestampResults(); + } + + @Override + public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException { + LazyBinaryReadTimestampResults lazyBinaryReadTimestampResults = (LazyBinaryReadTimestampResults) readTimestampResults; + int length = TimestampWritable.getTotalLength(bytes, offset); + int saveStart = offset; + offset += length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + TimestampWritable timestampWritable = lazyBinaryReadTimestampResults.getTimestampWritable(); + timestampWritable.set(bytes, saveStart); + } + + /* + * DECIMAL. + */ + + // This class is for internal use. + private static class LazyBinaryReadDecimalResults extends ReadDecimalResults { + + public HiveDecimal hiveDecimal; + + public void init(DecimalTypeInfo decimalTypeInfo) { + super.init(decimalTypeInfo); + } + + @Override + public HiveDecimal getHiveDecimal() { + return hiveDecimal; + } + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DECIMAL field. + @Override + public ReadDecimalResults createReadDecimalResults() { + return new LazyBinaryReadDecimalResults(); + } + + @Override + public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException { + LazyBinaryReadDecimalResults lazyBinaryReadDecimalResults = (LazyBinaryReadDecimalResults) readDecimalResults; + + if (!lazyBinaryReadDecimalResults.isInit()) { + lazyBinaryReadDecimalResults.init(saveDecimalTypeInfo); + } + + lazyBinaryReadDecimalResults.hiveDecimal = saveDecimal; + + saveDecimal = null; + saveDecimalTypeInfo = null; + } + + /** + * We read the whole HiveDecimal value and then enforce precision and scale, which may + * make it a NULL. + * @return Returns true if this HiveDecimal enforced to a NULL. + */ + private boolean earlyReadHiveDecimal() throws EOFException { + + // Since enforcing precision and scale can cause a HiveDecimal to become NULL, + // we must read it, enforce it here, and either return NULL or buffer the result. + + // These calls are to see how much data there is. The setFromBytes call below will do the same + // readVInt reads but actually unpack the decimal. + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + int saveStart = offset; + offset += tempVInt.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + offset += tempVInt.value; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + int length = offset - saveStart; + + if (tempHiveDecimalWritable == null) { + tempHiveDecimalWritable = new HiveDecimalWritable(); + } + tempHiveDecimalWritable.setFromBytes(bytes, saveStart, length); + + saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + + int precision = saveDecimalTypeInfo.getPrecision(); + int scale = saveDecimalTypeInfo.getScale(); + + saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale); + + // Move past this field whether it is NULL or NOT NULL. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + // Now return whether it is NULL or NOT NULL. + return (saveDecimal == null); + } +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java new file mode 100644 index 0000000..8cb2741 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java @@ -0,0 +1,734 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazybinary.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hive.common.util.DateUtils; + +/* + * Directly serialize, field-by-field, the LazyBinary format. +* + * This is an alternative way to serialize than what is provided by LazyBinarySerDe. + */ +public class LazyBinarySerializeWrite implements SerializeWrite { + public static final Log LOG = LogFactory.getLog(LazyBinarySerializeWrite.class.getName()); + + private Output output; + + private int fieldCount; + private int fieldIndex; + private byte nullByte; + private long nullOffset; + + // For thread safety, we allocate private writable objects for our use only. + private HiveDecimalWritable hiveDecimalWritable; + private TimestampWritable timestampWritable; + private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; + private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; + private HiveIntervalDayTime hiveIntervalDayTime; + + public LazyBinarySerializeWrite(int fieldCount) { + this(); + this.fieldCount = fieldCount; + } + + // Not public since we must have the field count and other information. + private LazyBinarySerializeWrite() { + } + + /* + * Set the buffer that will receive the serialized data. + */ + @Override + public void set(Output output) { + this.output = output; + output.reset(); + fieldIndex = 0; + nullByte = 0; + nullOffset = 0; + } + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + @Override + public void reset() { + output.reset(); + fieldIndex = 0; + nullByte = 0; + nullOffset = 0; + } + + /* + * General Pattern: + * + * // Every 8 fields we write a NULL byte. + * IF ((fieldIndex % 8) == 0), then + * IF (fieldIndex > 0), then + * Write back previous NullByte + * NullByte = 0 + * Remember write position + * Allocate room for next NULL byte. + * + * WHEN NOT NULL: Set bit in NULL byte; Write value. + * OTHERWISE NULL: We do not set a bit in the nullByte when we are writing a null. + * + * Increment fieldIndex + * + * IF (fieldIndex == fieldCount), then + * Write back final NullByte + * + */ + + /* + * Write a NULL field. + */ + @Override + public void writeNull() throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // We DO NOT set a bit in the NULL byte when we are writing a NULL. + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * BOOLEAN. + */ + @Override + public void writeBoolean(boolean v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + output.write((byte) (v ? 1 : 0)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * BYTE. + */ + @Override + public void writeByte(byte v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + output.write(v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * SHORT. + */ + @Override + public void writeShort(short v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + output.write((byte) (v >> 8)); + output.write((byte) (v)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * INT. + */ + @Override + public void writeInt(int v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * LONG. + */ + @Override + public void writeLong(long v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVLong(output, v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * FLOAT. + */ + @Override + public void writeFloat(float vf) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + int v = Float.floatToIntBits(vf); + output.write((byte) (v >> 24)); + output.write((byte) (v >> 16)); + output.write((byte) (v >> 8)); + output.write((byte) (v)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * DOUBLE. + */ + @Override + public void writeDouble(double v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeDouble(output, v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + @Override + public void writeString(byte[] v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + int length = v.length; + LazyBinaryUtils.writeVInt(output, length); + + output.write(v, 0, length); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + @Override + public void writeString(byte[] v, int start, int length) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, length); + + output.write(v, start, length); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * CHAR. + */ + @Override + public void writeHiveChar(HiveChar hiveChar) throws IOException { + String string = hiveChar.getStrippedValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * VARCHAR. + */ + @Override + public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { + String string = hiveVarchar.getValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * BINARY. + */ + @Override + public void writeBinary(byte[] v) throws IOException { + writeString(v); + } + + @Override + public void writeBinary(byte[] v, int start, int length) throws IOException { + writeString(v, start, length); + } + + /* + * DATE. + */ + @Override + public void writeDate(Date date) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, DateWritable.dateToDays(date)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + // We provide a faster way to write a date without a Date object. + @Override + public void writeDate(int dateAsDays) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, dateAsDays); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * TIMESTAMP. + */ + @Override + public void writeTimestamp(Timestamp v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + } + timestampWritable.set(v); + timestampWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * INTERVAL_YEAR_MONTH. + */ + @Override + public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(viyt); + hiveIntervalYearMonthWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + @Override + public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(totalMonths); + hiveIntervalYearMonthWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * INTERVAL_DAY_TIME. + */ + @Override + public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + hiveIntervalDayTimeWritable.set(vidt); + hiveIntervalDayTimeWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + @Override + public void writeHiveIntervalDayTime(long totalNanos) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalDayTime == null) { + hiveIntervalDayTime = new HiveIntervalDayTime(); + } + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); + hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); + hiveIntervalDayTimeWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * DECIMAL. + */ + @Override + public void writeHiveDecimal(HiveDecimal v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveDecimalWritable == null) { + hiveDecimalWritable = new HiveDecimalWritable(); + } + hiveDecimalWritable.set(v); + hiveDecimalWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java index f650409..e156f4d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java @@ -29,7 +29,7 @@ public WritableHiveDecimalObjectInspector() { } - protected WritableHiveDecimalObjectInspector(DecimalTypeInfo typeInfo) { + public WritableHiveDecimalObjectInspector(DecimalTypeInfo typeInfo) { super(typeInfo); } diff --git serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java index 3226114..036be4e 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java +++ serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java @@ -30,7 +30,9 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; @@ -104,24 +106,11 @@ public void testLazyBinarySerDe() throws Throwable { int num = 1000; Random r = new Random(1234); MyTestClass rows[] = new MyTestClass[num]; + for (int i = 0; i < num; i++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, i); - MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li,ba); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); rows[i] = t; } diff --git serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java new file mode 100644 index 0000000..a3472ad --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -0,0 +1,373 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; + +/** + * TestBinarySortableSerDe. + * + */ +public class VerifyFast { + + public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException { + + boolean isNull; + + isNull = deserializeRead.readCheckNull(); + if (isNull) { + if (object != null) { + TestCase.fail("Field reports null but object is not null"); + } + return; + } else if (object == null) { + TestCase.fail("Field report not null but object is null"); + } + switch (primitiveTypeInfo.getPrimitiveCategory()) { + case BOOLEAN: + { + boolean value = deserializeRead.readBoolean(); + if (!(object instanceof Boolean)) { + TestCase.fail("Boolean expected object not Boolean"); + } + Boolean expected = (Boolean) object; + if (value != expected) { + TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case BYTE: + { + byte value = deserializeRead.readByte(); + if (!(object instanceof Byte)) { + TestCase.fail("Byte expected object not Byte"); + } + Byte expected = (Byte) object; + if (value != expected) { + TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); + } + } + break; + case SHORT: + { + short value = deserializeRead.readShort(); + if (!(object instanceof Short)) { + TestCase.fail("Short expected object not Short"); + } + Short expected = (Short) object; + if (value != expected) { + TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case INT: + { + int value = deserializeRead.readInt(); + if (!(object instanceof Integer)) { + TestCase.fail("Integer expected object not Integer"); + } + Integer expected = (Integer) object; + if (value != expected) { + TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case LONG: + { + long value = deserializeRead.readLong(); + if (!(object instanceof Long)) { + TestCase.fail("Long expected object not Long"); + } + Long expected = (Long) object; + if (value != expected) { + TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case FLOAT: + { + float value = deserializeRead.readFloat(); + Float expected = (Float) object; + if (!(object instanceof Float)) { + TestCase.fail("Float expected object not Float"); + } + if (value != expected) { + TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case DOUBLE: + { + double value = deserializeRead.readDouble(); + Double expected = (Double) object; + if (!(object instanceof Double)) { + TestCase.fail("Double expected object not Double"); + } + if (value != expected) { + TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case STRING: + { + DeserializeRead.ReadStringResults readStringResults = deserializeRead.createReadStringResults(); + deserializeRead.readString(readStringResults); + byte[] stringBytes = Arrays.copyOfRange(readStringResults.bytes, readStringResults.start, readStringResults.start + readStringResults.length); + Text text = new Text(stringBytes); + String string = text.toString(); + String expected = (String) object; + if (!string.equals(expected)) { + TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')"); + } + } + break; + case CHAR: + { + DeserializeRead.ReadHiveCharResults readHiveCharResults = deserializeRead.createReadHiveCharResults(); + deserializeRead.readHiveChar(readHiveCharResults); + HiveChar hiveChar = readHiveCharResults.getHiveChar(); + HiveChar expected = (HiveChar) object; + if (!hiveChar.equals(expected)) { + TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')"); + } + } + break; + case VARCHAR: + { + DeserializeRead.ReadHiveVarcharResults readHiveVarcharResults = deserializeRead.createReadHiveVarcharResults(); + deserializeRead.readHiveVarchar(readHiveVarcharResults); + HiveVarchar hiveVarchar = readHiveVarcharResults.getHiveVarchar(); + HiveVarchar expected = (HiveVarchar) object; + if (!hiveVarchar.equals(expected)) { + TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')"); + } + } + break; + case DECIMAL: + { + DeserializeRead.ReadDecimalResults readDecimalResults = deserializeRead.createReadDecimalResults(); + deserializeRead.readHiveDecimal(readDecimalResults); + HiveDecimal value = readDecimalResults.getHiveDecimal(); + if (value == null) { + TestCase.fail("Decimal field evaluated to NULL"); + } + HiveDecimal expected = (HiveDecimal) object; + if (!value.equals(expected)) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale); + } + } + break; + case DATE: + { + DeserializeRead.ReadDateResults readDateResults = deserializeRead.createReadDateResults(); + deserializeRead.readDate(readDateResults); + Date value = readDateResults.getDate(); + Date expected = (Date) object; + if (!value.equals(expected)) { + TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case TIMESTAMP: + { + DeserializeRead.ReadTimestampResults readTimestampResults = deserializeRead.createReadTimestampResults(); + deserializeRead.readTimestamp(readTimestampResults); + Timestamp value = readTimestampResults.getTimestamp(); + Timestamp expected = (Timestamp) object; + if (!value.equals(expected)) { + TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_YEAR_MONTH: + { + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth value = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + HiveIntervalYearMonth expected = (HiveIntervalYearMonth) object; + if (!value.equals(expected)) { + TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_DAY_TIME: + { + DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + HiveIntervalDayTime value = readIntervalDayTimeResults.getHiveIntervalDayTime(); + HiveIntervalDayTime expected = (HiveIntervalDayTime) object; + if (!value.equals(expected)) { + TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case BINARY: + { + DeserializeRead.ReadBinaryResults readBinaryResults = deserializeRead.createReadBinaryResults(); + deserializeRead.readBinary(readBinaryResults); + byte[] byteArray = Arrays.copyOfRange(readBinaryResults.bytes, readBinaryResults.start, readBinaryResults.start + readBinaryResults.length); + byte[] expected = (byte[]) object; + if (byteArray.length != expected.length){ + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + for (int b = 0; b < byteArray.length; b++) { + if (byteArray[b] != expected[b]) { + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + } + } + break; + default: + throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory()); + } + } + + public static void serializeWrite(SerializeWrite serializeWrite, PrimitiveCategory primitiveCategory, Object object) throws IOException { + if (object == null) { + serializeWrite.writeNull(); + return; + } + switch (primitiveCategory) { + case BOOLEAN: + { + boolean value = (Boolean) object; + serializeWrite.writeBoolean(value); + } + break; + case BYTE: + { + byte value = (Byte) object; + serializeWrite.writeByte(value); + } + break; + case SHORT: + { + short value = (Short) object; + serializeWrite.writeShort(value); + } + break; + case INT: + { + int value = (Integer) object; + serializeWrite.writeInt(value); + } + break; + case LONG: + { + long value = (Long) object; + serializeWrite.writeLong(value); + } + break; + case FLOAT: + { + float value = (Float) object; + serializeWrite.writeFloat(value); + } + break; + case DOUBLE: + { + double value = (Double) object; + serializeWrite.writeDouble(value); + } + break; + case STRING: + { + String value = (String) object; + byte[] stringBytes = value.getBytes(); + int stringLength = stringBytes.length; + serializeWrite.writeString(stringBytes, 0, stringLength); + } + break; + case CHAR: + { + HiveChar value = (HiveChar) object; + serializeWrite.writeHiveChar(value); + } + break; + case VARCHAR: + { + HiveVarchar value = (HiveVarchar) object; + serializeWrite.writeHiveVarchar(value); + } + break; + case DECIMAL: + { + HiveDecimal value = (HiveDecimal) object; + serializeWrite.writeHiveDecimal(value); + } + break; + case DATE: + { + Date value = (Date) object; + serializeWrite.writeDate(value); + } + break; + case TIMESTAMP: + { + Timestamp value = (Timestamp) object; + serializeWrite.writeTimestamp(value); + } + break; + case INTERVAL_YEAR_MONTH: + { + HiveIntervalYearMonth value = (HiveIntervalYearMonth) object; + serializeWrite.writeHiveIntervalYearMonth(value); + } + break; + case INTERVAL_DAY_TIME: + { + HiveIntervalDayTime value = (HiveIntervalDayTime) object; + serializeWrite.writeHiveIntervalDayTime(value); + } + break; + case BINARY: + { + byte[] binaryBytes = (byte[]) object; + int length = binaryBytes.length; + serializeWrite.writeBinary(binaryBytes, 0, length); + } + break; + default: + throw new Error("Unknown primitive category " + primitiveCategory.name()); + } + } +} \ No newline at end of file diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java index d1d5760..370e857 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java @@ -18,40 +18,82 @@ package org.apache.hadoop.hive.serde2.binarysortable; import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; import java.util.List; +import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; public class MyTestClass { - Byte myByte; - Short myShort; - Integer myInt; - Long myLong; - Float myFloat; - Double myDouble; - String myString; - HiveDecimal myDecimal; - Date myDate; - MyTestInnerStruct myStruct; - List myList; - byte[] myBA; + + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + + // Add more complex types. + public MyTestInnerStruct myStruct; + public List myList; public MyTestClass() { } - public MyTestClass(Byte b, Short s, Integer i, Long l, Float f, Double d, - String st, HiveDecimal bd, Date date, MyTestInnerStruct is, List li, byte[] ba) { - myByte = b; - myShort = s; - myInt = i; - myLong = l; - myFloat = f; - myDouble = d; - myString = st; - myDecimal = bd; - myDate = date; - myStruct = is; - myList = li; - myBA = ba; + public final static int fieldCount = 18; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(MyTestClass.fieldCount); + int field = 0; + + myBool = (randField == field++) ? null : (r.nextInt(1) == 1); + myByte = (randField == field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = (randField == field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = (randField == field++) ? null : Integer.valueOf(r.nextInt()); + myLong = (randField == field++) ? null : Long.valueOf(r.nextLong()); + myFloat = (randField == field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = (randField == field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = (randField == field++) ? null : MyTestPrimitiveClass.getRandString(r); + myHiveChar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveVarchar(r, extraTypeInfo); + myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); + myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); + myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); + myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); + myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); + + myStruct = (randField == field++) ? null : new MyTestInnerStruct( + r.nextInt(5) - 2, r.nextInt(5) - 2); + myList = (randField == field++) ? null : getRandIntegerArray(r); + return field; + } + + public static List getRandIntegerArray(Random r) { + int length = r.nextInt(10); + ArrayList result = new ArrayList(length); + for (int i = 0; i < length; i++) { + result.add(r.nextInt(128)); + } + return result; } + } diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java new file mode 100644 index 0000000..2e2327b --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java @@ -0,0 +1,453 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.binarysortable; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Writable; +import org.apache.hive.common.util.DateUtils; + +// Just the primitive types. +public class MyTestPrimitiveClass { + + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + + public MyTestPrimitiveClass() { + } + + public final static int primitiveCount = 16; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(primitiveCount); + int field = 0; + return randomFill(r, randField, field, extraTypeInfo); + } + + public boolean chooseNull(Random r, int randField, int field) { + if (randField == field) { + return true; + } + return (r.nextInt(5) == 0); + } + + public int randomFill(Random r, int randField, int field, ExtraTypeInfo extraTypeInfo) { + myBool = chooseNull(r, randField, field++) ? null : Boolean.valueOf(r.nextInt(1) == 1); + myByte = chooseNull(r, randField, field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = chooseNull(r, randField, field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = chooseNull(r, randField, field++) ? null : Integer.valueOf(r.nextInt()); + myLong = chooseNull(r, randField, field++) ? null : Long.valueOf(r.nextLong()); + myFloat = chooseNull(r, randField, field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = chooseNull(r, randField, field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = chooseNull(r, randField, field++) ? null : getRandString(r); + myHiveChar = chooseNull(r, randField, field++) ? null : getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = chooseNull(r, randField, field++) ? null : getRandHiveVarchar(r, extraTypeInfo); + myBinary = getRandBinary(r, r.nextInt(1000)); + myDecimal = chooseNull(r, randField, field++) ? null : getRandHiveDecimal(r, extraTypeInfo); + myDate = chooseNull(r, randField, field++) ? null : getRandDate(r); + myTimestamp = chooseNull(r, randField, field++) ? null : getRandTimestamp(r); + myIntervalYearMonth = chooseNull(r, randField, field++) ? null : getRandIntervalYearMonth(r); + myIntervalDayTime = chooseNull(r, randField, field++) ? null : getRandIntervalDayTime(r); + return field; + } + + public static class ExtraTypeInfo { + public int hiveCharMaxLength; + public int hiveVarcharMaxLength; + public int precision; + public int scale; + + public ExtraTypeInfo() { + // For NULL fields, make up a valid max length. + hiveCharMaxLength = 1; + hiveVarcharMaxLength = 1; + precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION; + scale = HiveDecimal.SYSTEM_DEFAULT_SCALE; + } + } + + public static PrimitiveTypeInfo[] getPrimitiveTypeInfos(ExtraTypeInfo extraTypeInfo) { + PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[primitiveCount]; + for (int i = 0; i < primitiveCount; i++) { + primitiveTypeInfos[i] = getPrimitiveTypeInfo(i, extraTypeInfo); + } + return primitiveTypeInfos; + } + + public static String getRandString(Random r) { + return getRandString(r, null, r.nextInt(10)); + } + + public static String getRandString(Random r, String characters, int length) { + if (characters == null) { + characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + if (characters == null) { + sb.append((char) (r.nextInt(128))); + } else { + sb.append(characters.charAt(r.nextInt(characters.length()))); + } + } + return sb.toString(); + } + + public static HiveChar getRandHiveChar(Random r, ExtraTypeInfo extraTypeInfo) { + int maxLength = 10 + r.nextInt(60); + extraTypeInfo.hiveCharMaxLength = maxLength; + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, ExtraTypeInfo extraTypeInfo) { + int maxLength = 10 + r.nextInt(60); + extraTypeInfo.hiveVarcharMaxLength = maxLength; + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, ExtraTypeInfo extraTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + extraTypeInfo.precision = bd.precision(); + extraTypeInfo.scale = bd.scale(); + if (extraTypeInfo.scale > extraTypeInfo.precision) { + // Sometimes weird decimals are produced? + continue; + } + + // For now, punt. + extraTypeInfo.precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION; + extraTypeInfo.scale = HiveDecimal.SYSTEM_DEFAULT_SCALE; + return bd; + } + } + + public static Date getRandDate(Random r) { + String dateStr = String.format("%d-%02d-%02d", + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28))); // day + Date dateVal = Date.valueOf(dateStr); + return dateVal; + } + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String timestampStr = String.format("%d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(1970 + r.nextInt(200)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal = Timestamp.valueOf(timestampStr); + return timestampVal; + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + TestCase.assertTrue(intervalYearMonthVal != null); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + TestCase.assertTrue(intervalDayTimeVal != null); + return intervalDayTimeVal; + } + + public Object getPrimitiveObject(int index) { + int field = 0; + if (index == field++) { + return myBool; + } else if (index == field++) { + return myByte; + } else if (index == field++) { + return myShort; + } else if (index == field++) { + return myInt; + } else if (index == field++) { + return myLong; + } else if (index == field++) { + return myFloat; + } else if (index == field++) { + return myDouble; + } else if (index == field++) { + return myString; + } else if (index == field++) { + return myHiveChar; + } else if (index == field++) { + return myHiveVarchar; + } else if (index == field++) { + return myBinary; + } else if (index == field++) { + return myDecimal; + } else if (index == field++) { + return myDate; + } else if (index == field++) { + return myTimestamp; + } else if (index == field++) { + return myIntervalYearMonth; + } else if (index == field++) { + return myIntervalDayTime; + } else { + throw new Error("Field " + " field not handled"); + } + } + + public Object getPrimitiveWritableObject(int index, PrimitiveTypeInfo primitiveTypeInfo) { + int field = 0; + if (index == field++) { + return (myBool == null ? null : PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.create((boolean) myBool)); + } else if (index == field++) { + return (myByte == null ? null : PrimitiveObjectInspectorFactory.writableByteObjectInspector.create((byte) myByte)); + } else if (index == field++) { + return (myShort == null ? null : PrimitiveObjectInspectorFactory.writableShortObjectInspector.create((short) myShort)); + } else if (index == field++) { + return (myInt == null ? null : PrimitiveObjectInspectorFactory.writableIntObjectInspector.create((int) myInt)); + } else if (index == field++) { + return (myLong == null ? null : PrimitiveObjectInspectorFactory.writableLongObjectInspector.create((long) myLong)); + } else if (index == field++) { + return (myFloat == null ? null : PrimitiveObjectInspectorFactory.writableFloatObjectInspector.create((float) myFloat)); + } else if (index == field++) { + return (myDouble == null ? null : PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.create((double) myDouble)); + } else if (index == field++) { + return (myString == null ? null : PrimitiveObjectInspectorFactory.writableStringObjectInspector.create(myString)); + } else if (index == field++) { + if (myHiveChar == null) { + return null; + } + CharTypeInfo charTypeInfo = (CharTypeInfo) primitiveTypeInfo; + WritableHiveCharObjectInspector writableCharObjectInspector = new WritableHiveCharObjectInspector(charTypeInfo); + return writableCharObjectInspector.create(myHiveChar); + } else if (index == field++) { + if (myHiveVarchar == null) { + return null; + } + VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) primitiveTypeInfo; + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector(varcharTypeInfo); + return writableVarcharObjectInspector.create(myHiveVarchar); + } else if (index == field++) { + return (myBinary == null ? null : PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(myBinary)); + } else if (index == field++) { + if (myDecimal == null) { + return null; + } + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector(decimalTypeInfo); + return writableDecimalObjectInspector.create(myDecimal); + } else if (index == field++) { + return (myDate == null ? null : PrimitiveObjectInspectorFactory.writableDateObjectInspector.create(myDate)); + } else if (index == field++) { + return (myTimestamp == null ? null : PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(myTimestamp)); + } else if (index == field++) { + return (myIntervalYearMonth == null ? null : PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(myIntervalYearMonth)); + } else if (index == field++) { + return (myIntervalDayTime == null ? null : PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(myIntervalDayTime)); + } else { + throw new Error("Field " + " field not handled"); + } + } + + + public static PrimitiveCategory getPrimitiveCategory(int index) { + int field = 0; + if (index == field++) { + return PrimitiveCategory.BOOLEAN; + } else if (index == field++) { + return PrimitiveCategory.BYTE; + } else if (index == field++) { + return PrimitiveCategory.SHORT; + } else if (index == field++) { + return PrimitiveCategory.INT; + } else if (index == field++) { + return PrimitiveCategory.LONG; + } else if (index == field++) { + return PrimitiveCategory.FLOAT; + } else if (index == field++) { + return PrimitiveCategory.DOUBLE; + } else if (index == field++) { + return PrimitiveCategory.STRING; + } else if (index == field++) { + return PrimitiveCategory.CHAR; + } else if (index == field++) { + return PrimitiveCategory.VARCHAR; + } else if (index == field++) { + return PrimitiveCategory.BINARY; + } else if (index == field++) { + return PrimitiveCategory.DECIMAL; + } else if (index == field++) { + return PrimitiveCategory.DATE; + } else if (index == field++) { + return PrimitiveCategory.TIMESTAMP; + } else if (index == field++) { + return PrimitiveCategory.INTERVAL_YEAR_MONTH; + } else if (index == field++) { + return PrimitiveCategory.INTERVAL_DAY_TIME; + } else { + throw new Error("Field " + " field not handled"); + } + } + + public static PrimitiveTypeInfo getPrimitiveTypeInfo(int index, ExtraTypeInfo extraTypeInfo) { + PrimitiveCategory primitiveCategory = getPrimitiveCategory(index); + String typeName; + switch (primitiveCategory) { + case BYTE: + typeName = "tinyint"; + break; + case SHORT: + typeName = "smallint"; + break; + case LONG: + typeName = "bigint"; + break; + case CHAR: + typeName = String.format("char(%d)", extraTypeInfo.hiveCharMaxLength); + break; + case VARCHAR: + typeName = String.format("varchar(%d)", extraTypeInfo.hiveVarcharMaxLength); + break; + case DECIMAL: + typeName = String.format("decimal(%d,%d)", extraTypeInfo.precision, extraTypeInfo.scale); + break; + default: + // No type name difference or adornment. + typeName = primitiveCategory.name().toLowerCase(); + break; + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + return primitiveTypeInfo; + } + + public StructObjectInspector getRowInspector(PrimitiveTypeInfo[] primitiveTypeInfos) { + List columnNames = new ArrayList(primitiveCount); + List primitiveObjectInspectorList = new ArrayList(primitiveCount); + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + columnNames.add(String.format("col%d", index)); + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveCategory)); + } + StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + return rowOI; + } +} diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java new file mode 100644 index 0000000..83d3839 --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.binarysortable; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.io.BytesWritable; + +public class TestBinarySortableFast extends TestCase { + + private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, + boolean[] columnSortOrderIsDesc, SerDe serde, StructObjectInspector rowOI, boolean ascending, + Map primitiveTypeInfoMap) throws Throwable { + + BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc); + + // Try to serialize + + // One Writable per row. + BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + + int[][] perFieldWriteLengthsArray = new int[myTestPrimitiveClasses.length][]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + Output output = new Output(); + binarySortableSerializeWrite.set(output); + + int[] perFieldWriteLengths = new int[MyTestPrimitiveClass.primitiveCount]; + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveCategory, object); + perFieldWriteLengths[index] = output.getLength(); + } + perFieldWriteLengthsArray[i] = perFieldWriteLengths; + + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(output.getData(), 0, output.getLength()); + serializeWriteBytes[i] = bytesWritable; + if (i > 0) { + int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]); + if ((compareResult < 0 && !ascending) + || (compareResult > 0 && ascending)) { + System.out.println("Test failed in " + + (ascending ? "ascending" : "descending") + " order with " + + (i - 1) + " and " + i); + System.out.println("serialized data [" + (i - 1) + "] = " + + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1])); + System.out.println("serialized data [" + i + "] = " + + TestBinarySortableSerDe.hexString(serializeWriteBytes[i])); + fail("Sort order of serialized " + (i - 1) + " and " + i + + " are reversed!"); + } + } + } + + + // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); + BinarySortableDeserializeRead binarySortableDeserializeRead = + new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + + BytesWritable bytesWritable = serializeWriteBytes[i]; + binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + } + binarySortableDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + BytesWritable bytesWritable = serializeWriteBytes[i]; + List deserializedRow = (List) serde.deserialize(bytesWritable); + + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + Object object = deserializedRow.get(index); + if (expected == null || object == null) { + if (expected != null || object != null) { + fail("SerDe deserialized NULL column mismatch"); + } + } else { + if (!object.equals(expected)) { + fail("SerDe deserialized value does not match"); + } + } + } + } + + // One Writable per row. + BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + + // Serialize using the SerDe, then below deserialize using DeserializeRead. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + + // Since SerDe reuses memory, we will need to make a copy. + BytesWritable serialized = (BytesWritable) serde.serialize(t, rowOI); + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(serialized); + byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + if (!Arrays.equals(serDeOutput, serializeWriteExpected)) { + int mismatchPos = -1; + if (serDeOutput.length != serializeWriteExpected.length) { + for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) { + if (serDeOutput[b] != serializeWriteExpected[b]) { + mismatchPos = b; + break; + } + } + fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i])); + } + for (int b = 0; b < serDeOutput.length; b++) { + if (serDeOutput[b] != serializeWriteExpected[b]) { + fail("SerializeWrite and SerDe serialization does not match at position " + b); + } + } + } + serdeBytes[i] = bytesWritable; + } + + // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); + BinarySortableDeserializeRead binarySortableDeserializeRead = + new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + + BytesWritable bytesWritable = serdeBytes[i]; + binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + } + binarySortableDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); + } + } + + public void testBinarySortableFast() throws Throwable { + try { + + int num = 1000; + Random r = new Random(1234); + MyTestPrimitiveClass myTestPrimitiveClasses[] = new MyTestPrimitiveClass[num]; + // Need a map because we sort. + Map primitiveTypeInfoMap = new HashMap(); + + for (int i = 0; i < num; i++) { + int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); + MyTestPrimitiveClass t = new MyTestPrimitiveClass(); + int field = 0; + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, randField, field, extraTypeInfo); + myTestPrimitiveClasses[i] = t; + PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); + primitiveTypeInfoMap.put(t, primitiveTypeInfos); + } + + StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory + .getReflectionObjectInspector(MyTestPrimitiveClass.class, + ObjectInspectorOptions.JAVA); + + TestBinarySortableSerDe.sort(myTestPrimitiveClasses, rowOI); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + String order; + order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+'); + SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-'); + SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + + boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount]; + Arrays.fill(columnSortOrderIsDesc, false); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_ascending, rowOI, true, primitiveTypeInfoMap); + Arrays.fill(columnSortOrderIsDesc, true); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_descending, rowOI, false, primitiveTypeInfoMap); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java index cefb72e..b3fb3be 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java @@ -26,11 +26,13 @@ import junit.framework.TestCase; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; @@ -64,7 +66,7 @@ public static String hexString(BytesWritable bytes) { return sb.toString(); } - private SerDe getSerDe(String fieldNames, String fieldTypes, String order) + public static SerDe getSerDe(String fieldNames, String fieldTypes, String order) throws Throwable { Properties schema = new Properties(); schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); @@ -124,7 +126,7 @@ private void testBinarySortableSerDe(Object[] rows, ObjectInspector rowOI, } } - private void sort(Object[] structs, ObjectInspector oi) { + public static void sort(Object[] structs, ObjectInspector oi) { for (int i = 0; i < structs.length; i++) { for (int j = i + 1; j < structs.length; j++) { if (ObjectInspectorUtils.compare(structs[i], oi, structs[j], oi) > 0) { @@ -136,66 +138,6 @@ private void sort(Object[] structs, ObjectInspector oi) { } } - public static HiveDecimal getRandHiveDecimal(Random r) { - StringBuilder sb = new StringBuilder(); - int l1 = 1+r.nextInt(18), l2 = r.nextInt(19); - - if (r.nextBoolean()) { - sb.append("-"); - } - - sb.append(getRandString(r, DECIMAL_CHARS, l1)); - if (l2 != 0) { - sb.append("."); - sb.append(getRandString(r, DECIMAL_CHARS, l2)); - } - - HiveDecimal bd = HiveDecimal.create(sb.toString()); - return bd; - } - - public static Date getRandDate(Random r) { - String dateStr = String.format("%d-%02d-%02d", - Integer.valueOf(1800 + r.nextInt(500)), // year - Integer.valueOf(1 + r.nextInt(12)), // month - Integer.valueOf(1 + r.nextInt(28))); // day - Date dateVal = Date.valueOf(dateStr); - return dateVal; - } - - public static String getRandString(Random r) { - return getRandString(r, null, r.nextInt(10)); - } - - public static String getRandString(Random r, String characters, int length) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < length; i++) { - if (characters == null) { - sb.append((char) (r.nextInt(128))); - } else { - sb.append(characters.charAt(r.nextInt(characters.length()))); - } - } - return sb.toString(); - } - - public static List getRandIntegerArray(Random r) { - int length = r.nextInt(10); - ArrayList result = new ArrayList(length); - for (int i = 0; i < length; i++) { - result.add(r.nextInt(128)); - } - return result; - } - - public static byte[] getRandBA(Random r, int len){ - byte[] bytes = new byte[len]; - for (int j = 0; j < len; j++){ - bytes[j] = Byte.valueOf((byte) r.nextInt()); - } - return bytes; - } - public void testBinarySortableSerDe() throws Throwable { try { @@ -206,23 +148,9 @@ public void testBinarySortableSerDe() throws Throwable { MyTestClass rows[] = new MyTestClass[num]; for (int i = 0; i < num; i++) { - int randField = r.nextInt(11); MyTestClass t = new MyTestClass(); - t.myByte = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - t.myShort = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - t.myInt = randField > 2 ? null : Integer.valueOf(r.nextInt()); - t.myLong = randField > 3 ? null : Long.valueOf(r.nextLong()); - t.myFloat = randField > 4 ? null : Float - .valueOf(r.nextFloat() * 10 - 5); - t.myDouble = randField > 5 ? null : Double - .valueOf(r.nextDouble() * 10 - 5); - t.myString = randField > 6 ? null : getRandString(r); - t.myDecimal = randField > 7 ? null : getRandHiveDecimal(r); - t.myDate = randField > 8 ? null : getRandDate(r); - t.myStruct = randField > 9 ? null : new MyTestInnerStruct( - r.nextInt(5) - 2, r.nextInt(5) - 2); - t.myList = randField > 10 ? null : getRandIntegerArray(r); - t.myBA = getRandBA(r, i); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); rows[i] = t; } @@ -234,10 +162,13 @@ public void testBinarySortableSerDe() throws Throwable { String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + String order; + order = StringUtils.leftPad("", MyTestClass.fieldCount, '+'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - "++++++++++++"), true); + order), true); + order = StringUtils.leftPad("", MyTestClass.fieldCount, '-'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - "------------"), false); + order), false); System.out.println("Test testTBinarySortableProtocol passed!"); } catch (Throwable e) { diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java new file mode 100644 index 0000000..46d9f54 --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; + +public class TestLazySimpleFast extends TestCase { + + private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, LazySimpleSerDe[] serdes, + StructObjectInspector[] rowOIs, byte separator, LazySerDeParameters[] serdeParams, + PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + + + // Try to serialize + BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + Output output = new Output(); + + LazySimpleSerializeWrite lazySimpleSerializeWrite = + new LazySimpleSerializeWrite(MyTestPrimitiveClass.primitiveCount, + separator, serdeParams[i]); + + lazySimpleSerializeWrite.set(output); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveCategory, object); + } + + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(output.getData(), 0, output.getLength()); + serializeWriteBytes[i] = bytesWritable; + } + + // Try to deserialize + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(primitiveTypeInfos, + separator, serdeParams[i]); + + BytesWritable bytesWritable = serializeWriteBytes[i]; + byte[] bytes = bytesWritable.getBytes(); + int length = bytesWritable.getLength(); + lazySimpleDeserializeRead.set(bytes, 0, length); + + char[] chars = new char[length]; + for (int c = 0; c < chars.length; c++) { + chars[c] = (char) (bytes[c] & 0xFF); + } + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + } + lazySimpleDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + BytesWritable bytesWritable = serializeWriteBytes[i]; + LazyStruct lazySimpleStruct = (LazyStruct) serdes[i].deserialize(bytesWritable); + + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index); + Object object; + if (lazyPrimitive != null) { + object = lazyPrimitive.getWritableObject(); + } else { + object = null; + } + if (expected == null || object == null) { + if (expected != null || object != null) { + fail("SerDe deserialized NULL column mismatch"); + } + } else { + if (!object.equals(expected)) { + fail("SerDe deserialized value does not match"); + } + } + } + } + + // One Writable per row. + byte[][] serdeBytes = new byte[myTestPrimitiveClasses.length][]; + + // Serialize using the SerDe, then below deserialize using DeserializeRead. + Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + // LazySimple seems to work better with an row object array instead of a Java object... + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + row[index] = object; + } + + Text serialized = (Text) serdes[i].serialize(row, rowOIs[i]); + byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()); + + byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + if (!Arrays.equals(bytes1, bytes2)) { + fail("SerializeWrite and SerDe serialization does not match"); + } + serdeBytes[i] = serialized.copyBytes(); + } + + // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(primitiveTypeInfos, + separator, serdeParams[i]); + + byte[] bytes = serdeBytes[i]; + lazySimpleDeserializeRead.set(bytes, 0, bytes.length); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + } + lazySimpleDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); + } + } + + private Properties createProperties(String fieldNames, String fieldTypes) { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + + tbl.setProperty("columns", fieldNames); + tbl.setProperty("columns.types", fieldTypes); + + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + + return tbl; + } + + private LazySimpleSerDe getSerDe(String fieldNames, String fieldTypes) throws SerDeException { + // Create the SerDe + LazySimpleSerDe serDe = new LazySimpleSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createProperties(fieldNames, fieldTypes); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); + return serDe; + } + + private LazySerDeParameters getSerDeParams(String fieldNames, String fieldTypes) throws SerDeException { + Configuration conf = new Configuration(); + Properties tbl = createProperties(fieldNames, fieldTypes); + return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); + } + + public void testLazySimpleFast() throws Throwable { + try { + + int num = 1000; + Random r = new Random(1234); + MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; + PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; + for (int i = 0; i < num; i++) { + int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); + MyTestPrimitiveClass t = new MyTestPrimitiveClass(); + int field = 0; + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, randField, field, extraTypeInfo); + PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); + rows[i] = t; + primitiveTypeInfosArray[i] = primitiveTypeInfos; + } + + // To get the specific type information for CHAR and VARCHAR, seems like we need an + // inspector and SerDe per row... + StructObjectInspector[] rowOIs = new StructObjectInspector[num]; + LazySimpleSerDe[] serdes = new LazySimpleSerDe[num]; + LazySerDeParameters[] serdeParams = new LazySerDeParameters[num]; + for (int i = 0; i < num; i++) { + MyTestPrimitiveClass t = rows[i]; + + StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + + rowOIs[i] = rowOI; + serdes[i] = getSerDe(fieldNames, fieldTypes); + serdeParams[i] = getSerDeParams(fieldNames, fieldTypes); + } + + byte separator = (byte) '\t'; + testLazySimpleFast(rows, serdes, rowOIs, separator, serdeParams, primitiveTypeInfosArray); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java index b6467ef..10549b9 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java @@ -18,49 +18,119 @@ package org.apache.hadoop.hive.serde2.lazybinary; import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; /** * MyTestClassBigger. * */ public class MyTestClassBigger { - Byte myByte; - Short myShort; - Integer myInt; - Long myLong; - Float myFloat; - Double myDouble; - String myString; - HiveDecimal myDecimal; - Date myDate; - MyTestInnerStruct myStruct; - List myList; - byte[] myBA; + + // The primitives. + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + + + // Add more complex types. + public MyTestInnerStruct myStruct; + public List myList; + + // Bigger addition. Map> myMap; + public final static int mapPos = 18; + public MyTestClassBigger() { } - public MyTestClassBigger(Byte b, Short s, Integer i, Long l, Float f, - Double d, String st, HiveDecimal bd, Date date, MyTestInnerStruct is, List li, - byte[] ba, Map> mp) { - myByte = b; - myShort = s; - myInt = i; - myLong = l; - myFloat = f; - myDouble = d; - myString = st; - myDecimal = bd; - myDate = date; - myStruct = is; - myList = li; - myBA = ba; - myMap = mp; + public final static int biggerCount = 19; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(biggerCount); + int field = 0; + myBool = (randField == field++) ? null : (r.nextInt(1) == 1); + myByte = (randField == field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = (randField == field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = (randField == field++) ? null : Integer.valueOf(r.nextInt()); + myLong = (randField == field++) ? null : Long.valueOf(r.nextLong()); + myFloat = (randField == field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = (randField == field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = (randField == field++) ? null : MyTestPrimitiveClass.getRandString(r); + myHiveChar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveVarchar(r, extraTypeInfo); + myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); + myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); + myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); + myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); + myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); + + myStruct = (randField == field++) ? null : new MyTestInnerStruct( + r.nextInt(5) - 2, r.nextInt(5) - 2); + myList = (randField == field++) ? null : MyTestClass.getRandIntegerArray(r); + + Map> mp = new HashMap>(); + String key = MyTestPrimitiveClass.getRandString(r); + List value = randField > 9 ? null + : getRandStructArray(r); + mp.put(key, value); + String key1 = MyTestPrimitiveClass.getRandString(r); + mp.put(key1, null); + String key2 = MyTestPrimitiveClass.getRandString(r); + List value2 = getRandStructArray(r); + mp.put(key2, value2); + myMap = mp; + return field; } + + /** + * Generate a random struct array. + * + * @param r + * random number generator + * @return an struct array + */ + static List getRandStructArray(Random r) { + int length = r.nextInt(10); + ArrayList result = new ArrayList( + length); + for (int i = 0; i < length; i++) { + MyTestInnerStruct ti = new MyTestInnerStruct(r.nextInt(), r.nextInt()); + result.add(ti); + } + return result; + } + } diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java index 8c7ffba..dabfb74 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java @@ -18,36 +18,68 @@ package org.apache.hadoop.hive.serde2.lazybinary; import java.sql.Date; +import java.sql.Timestamp; +import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; public class MyTestClassSmaller { - Byte myByte; - Short myShort; - Integer myInt; - Long myLong; - Float myFloat; - Double myDouble; - String myString; - HiveDecimal myDecimal; - Date myDate; + + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + MyTestInnerStruct myStruct; - public MyTestClassSmaller() { - } + public final static int smallerCount = 17; - public MyTestClassSmaller(Byte b, Short s, Integer i, Long l, Float f, - Double d, String st, HiveDecimal bd, Date date, MyTestInnerStruct is) { - myByte = b; - myShort = s; - myInt = i; - myLong = l; - myFloat = f; - myDouble = d; - myString = st; - myDecimal = bd; - myDate = date; - myStruct = is; + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(smallerCount); + int field = 0; + + myBool = (randField == field++) ? null : (r.nextInt(1) == 1); + myByte = (randField == field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = (randField == field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = (randField == field++) ? null : Integer.valueOf(r.nextInt()); + myLong = (randField == field++) ? null : Long.valueOf(r.nextLong()); + myFloat = (randField == field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = (randField == field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = (randField == field++) ? null : MyTestPrimitiveClass.getRandString(r); + myHiveChar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveVarchar(r, extraTypeInfo); + myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); + myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); + myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); + myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); + myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); + + myStruct = (randField == field++) ? null : new MyTestInnerStruct( + r.nextInt(5) - 2, r.nextInt(5) - 2); + return field; } + } diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java new file mode 100644 index 0000000..a169586 --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; + +public class TestLazyBinaryFast extends TestCase { + + private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, SerDe[] serdes, StructObjectInspector[] rowOIs, + PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + + LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(MyTestPrimitiveClass.primitiveCount); + + // Try to serialize + BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + Output output = new Output(); + lazyBinarySerializeWrite.set(output); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveCategory, object); + } + + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(output.getData(), 0, output.getLength()); + serializeWriteBytes[i] = bytesWritable; + } + + // Try to deserialize + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(primitiveTypeInfos); + + BytesWritable bytesWritable = serializeWriteBytes[i]; + lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + } + lazyBinaryDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + BytesWritable bytesWritable = serializeWriteBytes[i]; + LazyBinaryStruct lazyBinaryStruct = (LazyBinaryStruct) serdes[i].deserialize(bytesWritable); + + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + Object object = lazyBinaryStruct.getField(index); + if (expected == null || object == null) { + if (expected != null || object != null) { + fail("SerDe deserialized NULL column mismatch"); + } + } else { + if (!object.equals(expected)) { + fail("SerDe deserialized value does not match"); + } + } + } + } + + // One Writable per row. + BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + + // Serialize using the SerDe, then below deserialize using DeserializeRead. + Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + // LazyBinary seems to work better with an row object array instead of a Java object... + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + row[index] = object; + } + + BytesWritable serialized = (BytesWritable) serdes[i].serialize(row, rowOIs[i]); + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(serialized); + byte[] bytes1 = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + if (!Arrays.equals(bytes1, bytes2)) { + fail("SerializeWrite and SerDe serialization does not match"); + } + serdeBytes[i] = bytesWritable; + } + + // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(primitiveTypeInfos); + + BytesWritable bytesWritable = serdeBytes[i]; + lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + } + lazyBinaryDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); + } + } + + public void testLazyBinaryFast() throws Throwable { + try { + + int num = 1000; + Random r = new Random(1234); + MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; + PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; + for (int i = 0; i < num; i++) { + int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); + MyTestPrimitiveClass t = new MyTestPrimitiveClass(); + int field = 0; + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, randField, field, extraTypeInfo); + PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); + rows[i] = t; + primitiveTypeInfosArray[i] = primitiveTypeInfos; + } + + // To get the specific type information for CHAR and VARCHAR, seems like we need an + // inspector and SerDe per row... + StructObjectInspector[] rowOIs = new StructObjectInspector[num]; + SerDe[] serdes = new SerDe[num]; + for (int i = 0; i < num; i++) { + MyTestPrimitiveClass t = rows[i]; + + StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + + rowOIs[i] = rowOI; + serdes[i] = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes); + } + + testLazyBinaryFast(rows, serdes, rowOIs, primitiveTypeInfosArray); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java index 02ae6f8..e54db95 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java @@ -36,7 +36,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBinary; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; @@ -91,7 +93,7 @@ * @return the initialized LazyBinarySerDe * @throws Throwable */ - private SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable { + protected static SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable { Properties schema = new Properties(); schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); @@ -194,46 +196,20 @@ private void testShorterSchemaDeserialization(Random r) throws Throwable { int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(11); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, itest); - Map> mp = new HashMap>(); - String key = TestBinarySortableSerDe.getRandString(r); - List value = randField > 9 ? null - : getRandStructArray(r); - mp.put(key, value); - String key1 = TestBinarySortableSerDe.getRandString(r); - mp.put(key1, null); - String key2 = TestBinarySortableSerDe.getRandString(r); - List value2 = getRandStructArray(r); - mp.put(key2, value2); - - MyTestClassBigger input = new MyTestClassBigger(b, s, n, l, f, d, st, bd, date, is, - li, ba, mp); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClassBigger t = new MyTestClassBigger(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -263,34 +239,20 @@ private void testShorterSchemaDeserialization1(Random r) throws Throwable { int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, itest); - MyTestClass input = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li, ba); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -320,34 +282,21 @@ void testLongerSchemaDeserialization(Random r) throws Throwable { int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, itest); - MyTestClass input = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li,ba); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -377,33 +326,20 @@ void testLongerSchemaDeserialization1(Random r) throws Throwable { int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(9); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 7 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - - MyTestClassSmaller input = new MyTestClassSmaller(b, s, n, l, f, d, st, bd, date, - is); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClassSmaller t = new MyTestClassSmaller(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -421,13 +357,13 @@ void testLazyBinaryMap(Random r) throws Throwable { StructObjectInspector soi1 = (StructObjectInspector) serdeOI; List fields1 = soi1.getAllStructFieldRefs(); LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1 - .get(12).getFieldObjectInspector(); + .get(MyTestClassBigger.mapPos).getFieldObjectInspector(); ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector(); ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector(); StructObjectInspector soi2 = rowOI; List fields2 = soi2.getAllStructFieldRefs(); - MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(12) + MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(MyTestClassBigger.mapPos) .getFieldObjectInspector(); ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector(); ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector(); @@ -439,18 +375,19 @@ void testLazyBinaryMap(Random r) throws Throwable { int randFields = r.nextInt(10); for (int i = 0; i < randFields; i++) { - String key = TestBinarySortableSerDe.getRandString(r); + String key = MyTestPrimitiveClass.getRandString(r); int randField = r.nextInt(10); List value = randField > 4 ? null : getRandStructArray(r); mp.put(key, value); + } - MyTestClassBigger input = new MyTestClassBigger(null, null, null, null, - null, null, null, null, null, null, null, null, mp); - BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI); + MyTestClassBigger t = new MyTestClassBigger(); + t.myMap = mp; + BytesWritable bw = (BytesWritable) serde.serialize(t, rowOI); Object output = serde.deserialize(bw); - Object lazyobj = soi1.getStructFieldData(output, fields1.get(12)); + Object lazyobj = soi1.getStructFieldData(output, fields1.get(MyTestClassBigger.mapPos)); Map outputmp = lazympoi.getMap(lazyobj); if (outputmp.size() != mp.size()) { @@ -497,23 +434,9 @@ public void testLazyBinarySerDe() throws Throwable { Random r = new Random(1234); MyTestClass rows[] = new MyTestClass[num]; for (int i = 0; i < num; i++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, i); - MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li, ba); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); rows[i] = t; }