diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java index 729908a..66e8a52 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java @@ -266,4 +266,12 @@ public static BigDecimal enforcePrecisionScale(BigDecimal bd, int maxPrecision, return bd; } + /** + * Sets the {@link BigDecimal} value in this object. + * @param bigDecimal + */ + public void setNormalize(BigDecimal bigDecimal) { + BigDecimal value = normalize(bigDecimal, true); + this.bd = value; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index f513188..6e79979 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; @@ -233,7 +234,8 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING and TIMESTAMP + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, + // DATE and DECIMAL switch (poi.getPrimitiveCategory()) { case BOOLEAN: case BYTE: @@ -241,6 +243,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case INT: case LONG: case TIMESTAMP: + case DATE: result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; case FLOAT: @@ -250,6 +253,11 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case STRING: result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; + case DECIMAL: + DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo(); + result.cols[j] = new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + tInfo.precision(), tInfo.scale()); + break; default: throw new RuntimeException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java index e5c3aa4..be5cea8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -33,6 +34,7 @@ Object writeValue(long value) throws HiveException; Object writeValue(double value) throws HiveException; Object writeValue(byte[] value, int start, int length) throws HiveException; + Object writeValue(Decimal128 value) throws HiveException; Object setValue(Object row, ColumnVector column, int columnRow) throws HiveException; Object initValue(Object ost) throws HiveException; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index a242fef..868f13e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.math.BigDecimal; +import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; @@ -25,18 +27,13 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.*; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -143,6 +140,21 @@ public Object writeValue(byte[] value, int start, int length) throws HiveExcepti public Object setValue(Object field, byte[] value, int start, int length) throws HiveException { throw new HiveException("Internal error: should not reach here"); } + + /** + * The base implementation must be overridden by the Decimal specialization + */ + @Override + public Object writeValue(Decimal128 value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Decimal specialization + */ + public Object setValue(Object field, Decimal128 value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } } /** @@ -272,7 +284,7 @@ public Object writeValue(ColumnVector column, int row) throws HiveException { "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, bcv.noNulls, bcv.isRepeating, bcv.isNull[row], bcv.isNull[0])); } - + @Override public Object setValue(Object field, ColumnVector column, int row) throws HiveException { BytesColumnVector bcv = (BytesColumnVector) column; @@ -294,7 +306,58 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", row, bcv.noNulls, bcv.isRepeating, bcv.isNull[row], bcv.isNull[0])); } - } + } + + + /** + * Specialized writer for DecimalColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterDecimal extends VectorExpressionWriterBase { + @Override + public Object writeValue(ColumnVector column, int row) throws HiveException { + DecimalColumnVector dcv = (DecimalColumnVector) column; + if (dcv.noNulls && !dcv.isRepeating) { + return writeValue(dcv.vector[row]); + } else if (dcv.noNulls && dcv.isRepeating) { + return writeValue(dcv.vector[0]); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return writeValue(dcv.vector[row]); + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return writeValue(dcv.vector[0]); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + + @Override + public Object setValue(Object field, ColumnVector column, int row) throws HiveException { + DecimalColumnVector dcv = (DecimalColumnVector) column; + if (dcv.noNulls && !dcv.isRepeating) { + return setValue(field, dcv.vector[row]); + } else if (dcv.noNulls && dcv.isRepeating) { + return setValue(field, dcv.vector[0]); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return setValue(field, dcv.vector[row]); + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return setValue(field, dcv.vector[0]); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + } /** * Compiles the appropriate vector expression writer based on an expression info (ExprNodeDesc) @@ -381,17 +444,78 @@ public static VectorExpressionWriter genVectorExpressionWritable( } private static VectorExpressionWriter genVectorExpressionWritableDecimal( - SettableHiveDecimalObjectInspector fieldObjInspector) throws HiveException { - - // We should never reach this, the compile validation should guard us - throw new HiveException("DECIMAL primitive type not supported in vectorization."); - } + SettableHiveDecimalObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterDecimal() { + private HiveDecimal hd; + private Object obj; + + public VectorExpressionWriter init(SettableHiveDecimalObjectInspector objInspector) throws HiveException { + super.init(objInspector); + hd = HiveDecimal.create(BigDecimal.ZERO); + obj = initValue(null); + return this; + } + + @Override + public Object writeValue(Decimal128 value) throws HiveException { + hd.setNormalize(value.toBigDecimal()); + ((SettableHiveDecimalObjectInspector) this.objectInspector).set(obj, hd); + return obj; + } + + @Override + public Object setValue(Object field, Decimal128 value) { + hd.setNormalize(value.toBigDecimal()); + ((SettableHiveDecimalObjectInspector) this.objectInspector).set(field, hd); + return field; + } + + @Override + public Object initValue(Object ignored) throws HiveException { + return ((SettableHiveDecimalObjectInspector) this.objectInspector).create( + HiveDecimal.create(BigDecimal.ZERO)); + } + }.init(fieldObjInspector); + } private static VectorExpressionWriter genVectorExpressionWritableDate( - SettableDateObjectInspector fieldObjInspector) throws HiveException { - // We should never reach this, the compile validation should guard us - throw new HiveException("DATE primitive type not supported in vectorization."); - } + SettableDateObjectInspector fieldObjInspector) throws HiveException { + return new VectorExpressionWriterLong() { + private Date dt; + private Object obj; + + public VectorExpressionWriter init(SettableDateObjectInspector objInspector) throws HiveException { + super.init(objInspector); + dt = new Date(0); + obj = initValue(null); + return this; + } + + @Override + public Object writeValue(long value) { + dt.setTime(DateWritable.daysToMillis((int) value)); + ((SettableDateObjectInspector) this.objectInspector).set(obj, dt); + return obj; + } + + @Override + public Object setValue(Object field, long value) { + if (null == field) { + field = initValue(null); + } + dt.setTime(DateWritable.daysToMillis((int) value)); + ((SettableDateObjectInspector) this.objectInspector).set(field, dt); + return field; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableDateObjectInspector) this.objectInspector).create(new Date(0)); + } + + }.init(fieldObjInspector); + } private static VectorExpressionWriter genVectorExpressionWritableTimestamp( SettableTimestampObjectInspector fieldObjInspector) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index ad96fa5..e694db1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -30,6 +30,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.Stack; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -165,7 +166,7 @@ protected static transient final Log LOG = LogFactory.getLog(Vectorizer.class); - Set supportedDataTypes = new HashSet(); + Pattern supportedDataTypesPattern; List> vectorizableTasks = new ArrayList>(); Set> supportedGenericUDFs = new HashSet>(); @@ -175,19 +176,25 @@ private PhysicalContext physicalContext = null;; public Vectorizer() { - supportedDataTypes.add("int"); - supportedDataTypes.add("smallint"); - supportedDataTypes.add("tinyint"); - supportedDataTypes.add("bigint"); - supportedDataTypes.add("integer"); - supportedDataTypes.add("long"); - supportedDataTypes.add("short"); - supportedDataTypes.add("timestamp"); - supportedDataTypes.add("boolean"); - supportedDataTypes.add("string"); - supportedDataTypes.add("byte"); - supportedDataTypes.add("float"); - supportedDataTypes.add("double"); + + StringBuilder patternBuilder = new StringBuilder(); + patternBuilder.append("int"); + patternBuilder.append("|smallint"); + patternBuilder.append("|tinyint"); + patternBuilder.append("|bigint"); + patternBuilder.append("|integer"); + patternBuilder.append("|long"); + patternBuilder.append("|short"); + patternBuilder.append("|timestamp"); + patternBuilder.append("|boolean"); + patternBuilder.append("|string"); + patternBuilder.append("|byte"); + patternBuilder.append("|float"); + patternBuilder.append("|double"); + patternBuilder.append("|date"); + patternBuilder.append("|decimal.*"); + + supportedDataTypesPattern = Pattern.compile(patternBuilder.toString()); supportedGenericUDFs.add(GenericUDFOPPlus.class); supportedGenericUDFs.add(GenericUDFOPMinus.class); @@ -747,7 +754,7 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc) { } private boolean validateDataType(String type) { - return supportedDataTypes.contains(type.toLowerCase()); + return supportedDataTypesPattern.matcher(type.toLowerCase()).matches(); } private VectorizationContext getVectorizationContext(Operator op, diff --git ql/src/test/queries/clientpositive/vectorization_decimal_date.q ql/src/test/queries/clientpositive/vectorization_decimal_date.q new file mode 100644 index 0000000..2b82a5a --- /dev/null +++ ql/src/test/queries/clientpositive/vectorization_decimal_date.q @@ -0,0 +1,4 @@ +CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc; +SET hive.vectorized.execution.enabled=true; +EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; +SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10; diff --git ql/src/test/results/clientpositive/vectorization_decimal_date.q.out ql/src/test/results/clientpositive/vectorization_decimal_date.q.out new file mode 100644 index 0000000..a4be3e7 --- /dev/null +++ ql/src/test/results/clientpositive/vectorization_decimal_date.q.out @@ -0,0 +1,71 @@ +PREHOOK: query: CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +POSTHOOK: query: CREATE TABLE date_decimal_test STORED AS ORC AS SELECT cint, cdouble, CAST (CAST (cint AS TIMESTAMP) AS DATE) AS cdate, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal FROM alltypesorc +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@date_decimal_test +PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME date_decimal_test))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL cdate)) (TOK_SELEXPR (TOK_TABLE_OR_COL cdecimal))) (TOK_WHERE (AND (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL cint)) (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL cdouble)))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + date_decimal_test + TableScan + alias: date_decimal_test + Filter Operator + predicate: + expr: (cint is not null and cdouble is not null) + type: boolean + Vectorized execution: true + Select Operator + expressions: + expr: cdate + type: date + expr: cdecimal + type: decimal(20,10) + outputColumnNames: _col0, _col1 + Vectorized execution: true + Limit + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_decimal_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_decimal_test +#### A masked pattern was here #### +1970-01-06 -7959.5837837838 +1970-01-06 -2516.4135135135 +1970-01-06 -9445.0621621622 +1970-01-06 -5713.7459459459 +1970-01-06 8963.6405405405 +1970-01-06 4193.6243243243 +1970-01-06 2964.3864864865 +1970-01-06 -4673.2540540541 +1970-01-06 -9216.8945945946 +1970-01-06 -9287.3756756757