diff --git common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java index 84bfcbe..55cb693 100644 --- common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java +++ common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java @@ -90,7 +90,7 @@ * @serial * @see #getUnscaledValue() */ - private final UnsignedInt128 unscaledValue; + private UnsignedInt128 unscaledValue; /** * The scale of this Decimal128, as returned by {@link #getScale()}. Unlike @@ -731,8 +731,7 @@ public Decimal128 update(char[] str, int offset, int length, short scale) { /** * Serializes the value in a format compatible with the BigDecimal's own representation - * @param bytes - * @param offset + * @param scratch */ public int fastSerializeForHiveDecimal( Decimal128FastBuffer scratch) { return this.unscaledValue.fastSerializeForHiveDecimal(scratch, this.signum); @@ -1502,7 +1501,7 @@ public int compareTo(Decimal128 val) { cmp = this.unscaledValue.compareToScaleTen(val.unscaledValue, (short) (this.scale - val.scale)); } else { - cmp = val.unscaledValue.compareToScaleTen(this.unscaledValue, + cmp = - val.unscaledValue.compareToScaleTen(this.unscaledValue, (short) (val.scale - this.scale)); } return cmp * this.signum; @@ -1927,5 +1926,26 @@ public Decimal128 fastUpdateFromInternalStorage(byte[] internalStorage, short sc return this; } + + /** + * This setter is only for de-serialization, should not be used otherwise. + */ + public void setUnscaledValue(UnsignedInt128 unscaledValue) { + this.unscaledValue = unscaledValue; + } + + /** + * This setter is only for de-serialization, should not be used otherwise. + */ + public void setScale(short scale) { + this.scale = scale; + } + + /** + * This setter is only for de-serialization, should not be used otherwise. + */ + public void setSignum(byte signum) { + this.signum = signum; + } } diff --git common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java index 74168bd..34bd9d0 100644 --- common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java +++ common/src/java/org/apache/hadoop/hive/common/type/UnsignedInt128.java @@ -15,6 +15,7 @@ */ package org.apache.hadoop.hive.common.type; +import java.io.Serializable; import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.IntBuffer; @@ -35,7 +36,7 @@ * SQL (e.g., exact POWER/SQRT). * */ -public final class UnsignedInt128 implements Comparable { +public final class UnsignedInt128 implements Comparable, Serializable { /** Number of ints to store this object. */ public static final int INT_COUNT = 4; @@ -61,7 +62,7 @@ * Int32 elements as little-endian (v[0] is least significant) unsigned * integers. */ - private final int[] v = new int[INT_COUNT]; + private int[] v = new int[INT_COUNT]; /** * Number of leading non-zero elements in {@link #v}. For example, if the @@ -70,7 +71,7 @@ * * @see #updateCount() */ - private transient byte count; + private byte count; /** * Determines the number of ints to store one value. @@ -2418,7 +2419,7 @@ private void multiplyDestructiveFitsInt32(UnsignedInt128 right, } } - /** Updates the value of {@link #cnt} by checking {@link #v}. */ + /** Updates the value of {@link #count} by checking {@link #v}. */ private void updateCount() { if (v[3] != 0) { this.count = (byte) 4; @@ -2634,4 +2635,30 @@ private int fastUpdateIntFromInternalStorage(byte[] internalStorage, } return value; } + + public int[] getV() { + return v; + } + + /** + * This setter is only for de-serialization, should not be used otherwise. + */ + public void setV(int [] v) { + this.v[0] = v[0]; + this.v[1] = v[1]; + this.v[2] = v[2]; + this.v[3] = v[3]; + updateCount(); + } + + public byte getCount() { + return count; + } + + /** + * This setter is only for de-serialization, should not be used otherwise. + */ + public void setCount(byte count) { + this.count = count; + } } diff --git common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java index 91bb8b5..debc270 100644 --- common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java +++ common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java @@ -119,6 +119,16 @@ public void testCompareTo() { Decimal128 d3 = new Decimal128(2.0d / 3.0d, (short) 5); Decimal128 d4 = new Decimal128(2.0d / 3.0d, (short) 8); assertTrue(d3.compareTo(d4) != 0); + + Decimal128 d5 = new Decimal128(12, (short) 5); + Decimal128 d6 = new Decimal128(15, (short) 7); + assertTrue(d5.compareTo(d6) < 0); + assertTrue(d6.compareTo(d5) > 0); + + Decimal128 d7 = new Decimal128(15, (short) 5); + Decimal128 d8 = new Decimal128(12, (short) 7); + assertTrue(d7.compareTo(d8) > 0); + assertTrue(d8.compareTo(d7) < 0); } @Test diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index edbbbec..c6a7c00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -20,7 +20,9 @@ import java.io.IOException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; @@ -43,6 +45,10 @@ private static final long serialVersionUID = 1L; + protected transient Object[] singleRow; + + protected transient VectorExpressionWriter[] valueWriters; + public VectorFileSinkOperator(VectorizationContext context, OperatorDesc conf) { super(); @@ -54,35 +60,44 @@ public VectorFileSinkOperator() { } @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + valueWriters = VectorExpressionWriterFactory.getExpressionWriters( + (StructObjectInspector) inputObjInspectors[0]); + singleRow = new Object[valueWriters.length]; + } + + @Override public void processOp(Object data, int tag) throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch)data; Writable [] records = null; boolean vectorizedSerde = false; - int outputIterations = 1; try { if (serializer instanceof VectorizedSerde) { recordValue = ((VectorizedSerde) serializer).serializeVector(vrg, inputObjInspectors[0]); records = (Writable[]) ((ObjectWritable) recordValue).get(); vectorizedSerde = true; - outputIterations = vrg.size; } } catch (SerDeException e1) { throw new HiveException(e1); } - for (int i = 0; i < outputIterations; i++) { + for (int i = 0; i < vrg.size; i++) { Writable row = null; if (vectorizedSerde) { row = records[i]; } else { if (vrg.valueWriters == null) { - vrg.setValueWriters(VectorExpressionWriterFactory.getExpressionWriters( - (StructObjectInspector)inputObjInspectors[0])); + vrg.setValueWriters(this.valueWriters); + } + try { + row = serializer.serialize(getRowObject(vrg, i), inputObjInspectors[0]); + } catch (SerDeException ex) { + throw new HiveException(ex); } - row = new Text(vrg.toString()); } /* Create list bucketing sub-directory only if stored-as-directories is on. */ String lbDirName = null; @@ -159,4 +174,17 @@ public void processOp(Object data, int tag) throws HiveException { } } } + + private Object[] getRowObject(VectorizedRowBatch vrg, int rowIndex) + throws HiveException { + int batchIndex = rowIndex; + if (vrg.selectedInUse) { + batchIndex = vrg.selected[rowIndex]; + } + for (int i = 0; i < vrg.projectionSize; i++) { + ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; + singleRow[i] = vrg.valueWriters[i].writeValue(vectorColumn, batchIndex); + } + return singleRow; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index 868f13e..886154d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -1045,7 +1045,7 @@ public VectorExpressionWriterSetter init( @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - throw new HiveException("Should never reach here"); + return baseWriter.writeValue(column, row); } @Override diff --git ql/src/test/queries/clientpositive/vector_decimal_expressions.q ql/src/test/queries/clientpositive/vector_decimal_expressions.q index f3b4c83..79c0e3e 100644 --- ql/src/test/queries/clientpositive/vector_decimal_expressions.q +++ ql/src/test/queries/clientpositive/vector_decimal_expressions.q @@ -1,4 +1,5 @@ CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2 FROM alltypesorc; -SET hive.vectorized.execution.enabled=true; -EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)) from decimal_test where cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdouble IS NOT NULL LIMIT 10; -SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)) from decimal_test where cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdouble IS NOT NULL LIMIT 10; +SET hive.vectorized.execution.enabled=false; +EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10; + +SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10; diff --git ql/src/test/results/clientpositive/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/vector_decimal_expressions.q.out index f0eea5d..629f5d5 100644 --- ql/src/test/results/clientpositive/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/vector_decimal_expressions.q.out @@ -5,9 +5,9 @@ POSTHOOK: query: CREATE TABLE decimal_test STORED AS ORC AS SELECT cdouble, CAST POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@decimal_test -PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)) from decimal_test where cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)) from decimal_test where cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: EXPLAIN SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -21,12 +21,12 @@ STAGE PLANS: alias: decimal_test Statistics: Num rows: 12288 Data size: 2128368 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((cdecimal1 > 0) and (cdecimal1 < 12345.5678)) and (cdecimal2 <> 0)) and cdouble is not null) (type: boolean) - Statistics: Num rows: 683 Data size: 118300 Basic stats: COMPLETE Column stats: NONE + predicate: (((((cdecimal1 > 0) and (cdecimal1 < 12345.5678)) and (cdecimal2 <> 0)) and (cdecimal2 > 1000)) and cdouble is not null) (type: boolean) + Statistics: Num rows: 228 Data size: 39491 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: double), (cdecimal1 * (cdecimal2 / 3.4)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 683 Data size: 118300 Basic stats: COMPLETE Column stats: NONE + expressions: (cdecimal1 + cdecimal2) (type: decimal(25,14)), (cdecimal1 - (2 * cdecimal2)) (type: decimal(26,14)), ((cdecimal1 + 2.34) / cdecimal2) (type: double), (cdecimal1 * (cdecimal2 / 3.4)) (type: double), (cdecimal1 % 10) (type: decimal(12,10)), UDFToInteger(cdecimal1) (type: int), UDFToShort(cdecimal2) (type: smallint), UDFToByte(cdecimal2) (type: tinyint), UDFToLong(cdecimal1) (type: bigint), UDFToBoolean(cdecimal1) (type: boolean), UDFToDouble(cdecimal2) (type: double), UDFToFloat(cdecimal1) (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 228 Data size: 39491 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 1730 Basic stats: COMPLETE Column stats: NONE @@ -43,21 +43,21 @@ STAGE PLANS: Fetch Operator limit: 10 -PREHOOK: query: SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)) from decimal_test where cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: query: SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@decimal_test #### A masked pattern was here #### -POSTHOOK: query: SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)) from decimal_test where cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: query: SELECT cdecimal1 + cdecimal2, cdecimal1 - (2*cdecimal2), ((cdecimal1+2.34)/cdecimal2), (cdecimal1 * (cdecimal2/3.4)), cdecimal1 % 10, CAST(cdecimal1 AS INT), CAST(cdecimal2 AS SMALLINT), CAST(cdecimal2 AS TINYINT), CAST(cdecimal1 AS BIGINT), CAST (cdecimal1 AS BOOLEAN), CAST(cdecimal2 AS DOUBLE), CAST(cdecimal1 AS FLOAT) FROM decimal_test WHERE cdecimal1 > 0 AND cdecimal1 < 12345.5678 AND cdecimal2 != 0 AND cdecimal2 > 1000 AND cdouble IS NOT NULL LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_test #### A masked pattern was here #### -19699.417463617423 -12507.913305613346 0.8351496686995997 2.8303425077026896E7 -9216.339708939685 -5851.80644490647 0.8353975893550668 6195112.1797296945 -6514.8403326403464 -4136.5212058211928 0.8355907765708067 3095563.9418919063 -7587.301455301477 -4817.467775467754 0.8354976172734904 4198623.24324327 -19197.972972973 -12189.527027027 0.835155361813429 2.6880848817567654E7 -17098.9945945946 -10856.8054054054 0.8351828165813104 2.132423090270272E7 -12433.723076923077 -7894.646153846154 0.8352770361086894 1.12754688E7 -13.12681912684615 -8.3347193346923 1.162028480096775 12.567567567624428 -7247.316839916862 -4601.598544698524 0.8355241651897876 3830775.6932432684 -14757.1700623700465 -9369.891476091493 0.8352226654922171 1.5883214124324286E7 +19699.417463617423 -12507.913305613346 0.8351496686995997 2.8303425077026896E7 3.6405405405 8963 10735 -17 8963 true 10735.776923076923 8963.641 +9216.339708939685 -5851.80644490647 0.8353975893550668 6195112.1797296945 3.6243243243 4193 5022 -98 4193 true 5022.715384615385 4193.6245 +6514.8403326403464 -4136.5212058211928 0.8355907765708067 3095563.9418919063 4.3864864865 2964 3550 -34 2964 true 3550.4538461538464 2964.3865 +7587.301455301477 -4817.467775467754 0.8354976172734904 4198623.24324327 2.3783783784 3452 4134 38 3452 true 4134.923076923077 3452.3784 +19197.972972973 -12189.527027027 0.835155361813429 2.6880848817567654E7 5.472972973 8735 10462 -34 8735 true 10462.5 8735.473 +17098.9945945946 -10856.8054054054 0.8351828165813104 2.132423090270272E7 0.3945945946 7780 9318 102 7780 true 9318.6 7780.3945 +12433.723076923077 -7894.646153846154 0.8352770361086894 1.12754688E7 7.6 5657 6776 120 5657 true 6776.123076923077 5657.6 +7247.316839916862 -4601.598544698524 0.8355241651897876 3830775.6932432684 7.6783783784 3297 3949 109 3297 true 3949.638461538462 3297.6785 +14757.1700623700465 -9369.891476091493 0.8352226654922171 1.5883214124324286E7 4.8162162162 6714 8042 106 6714 true 8042.3538461538465 6714.8164 +10964.832016631993 -6961.991060291086 0.8353232978714221 8768719.779729689 9.2243243243 4989 5975 87 4989 true 5975.607692307693 4989.224