diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/BaseVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/BaseVectorExpression.java new file mode 100644 index 0000000000..766fab0f88 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/BaseVectorExpression.java @@ -0,0 +1,131 @@ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Base class for vector expressions which don't want to copy the plain old + * loop-in-loop-and-call-business-function logic + * + */ +public abstract class BaseVectorExpression extends VectorExpression { + private static final long serialVersionUID = 1L; + protected final int inputColumn; + + public BaseVectorExpression(int inputColumn, int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = inputColumn; + } + + public BaseVectorExpression(int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = -1; + } + + public BaseVectorExpression() { + super(); + // Dummy final assignments. + inputColumn = -1; + } + + @SuppressWarnings("unchecked") + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector inputColVector = batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + ColumnVector outputColVector = batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + beforeLoop((O) outputColVector, (I) inputColVector); + + if (n == 0) { + // Nothing to do + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func((O) outputColVector, (I) inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func((O) outputColVector, (I) inputColVector, i); + } + } else { + for (int j = 0; j != n; j++) { + final int i = sel[j]; + func((O) outputColVector, (I) inputColVector, i); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for (int i = 0; i != n; i++) { + func((O) outputColVector, (I) inputColVector, i); + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func((O) outputColVector, (I) inputColVector, i); + } + } + } else { + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); + for (int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + func((O) outputColVector, (I) inputColVector, i); + } + } + } + } + } + + protected void beforeLoop(O outputColVector, I inputColVector) { + } + + abstract protected void func(O outputColVector, I inputColVector, int batchIndex); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java index eb925af64c..7bdb490d4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java @@ -51,12 +51,9 @@ protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i) { @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DOUBLE) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.DOUBLE) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java index fa88e3ffb0..31aa1c23fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDecimal.java @@ -23,9 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; /** - * To be used to cast long and boolean to decimal. - * This works for boolean too because boolean is encoded as 0 - * for false and 1 for true. + * To be used to cast long and boolean to decimal. This works for boolean too because boolean is + * encoded as 0 for false and 1 for true. */ public class CastLongToDecimal extends FuncLongToDecimal { @@ -39,7 +38,6 @@ public CastLongToDecimal(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); } - @Override protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { outV.set(i, HiveDecimal.create(inV.vector[i])); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConvertDecimal64ToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConvertDecimal64ToDecimal.java index 04a2cba62a..2af73f3da0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConvertDecimal64ToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConvertDecimal64ToDecimal.java @@ -37,7 +37,6 @@ public ConvertDecimal64ToDecimal(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); } - @Override protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { outV.vector[i].deserialize64(inV.vector[i], ((Decimal64ColumnVector) inV).scale); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index 09215509de..385d8ec0c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -18,19 +18,16 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; /** - * This is a superclass for unary decimal functions and expressions returning doubles that - * operate directly on the input and set the output. + * This is a superclass for unary decimal functions and expressions returning doubles that operate + * directly on the input and set the output. */ -public abstract class FuncDecimalToDouble extends VectorExpression { +public abstract class FuncDecimalToDouble + extends BaseVectorExpression { private static final long serialVersionUID = 1L; private final int inputColumn; @@ -46,99 +43,6 @@ public FuncDecimalToDouble() { inputColumn = -1; } - abstract protected void func(DoubleColumnVector outputColVector, DecimalColumnVector inputColVector, int i); - - @Override - public void evaluate(VectorizedRowBatch batch) throws HiveException { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[inputColumn]; - int[] sel = batch.selected; - int n = batch.size; - DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; - - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - - if (n == 0) { - - // Nothing to do - return; - } - - // We do not need to do a column reset since we are carefully changing the output. - outputColVector.isRepeating = false; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputIsNull[0]) { - // Set isNull before call in case it changes it mind. - outputIsNull[0] = false; - func(outputColVector, inputColVector, 0); - } else { - outputIsNull[0] = true; - outputColVector.noNulls = false; - } - outputColVector.isRepeating = true; - return; - } - - if (inputColVector.noNulls) { - if (batch.selectedInUse) { - - // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. - - if (!outputColVector.noNulls) { - for(int j = 0; j != n; j++) { - final int i = sel[j]; - // Set isNull before call in case it changes it mind. - outputIsNull[i] = false; - func(outputColVector, inputColVector, i); - } - } else { - for(int j = 0; j != n; j++) { - final int i = sel[j]; - func(outputColVector, inputColVector, i); - } - } - } else { - if (!outputColVector.noNulls) { - - // Assume it is almost always a performance win to fill all of isNull so we can - // safely reset noNulls. - Arrays.fill(outputIsNull, false); - outputColVector.noNulls = true; - } - for(int i = 0; i != n; i++) { - func(outputColVector, inputColVector, i); - } - } - } else /* there are nulls in the inputColVector */ { - - // Carefully handle NULLs... - outputColVector.noNulls = false; - - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector.isNull[i]; - if (!inputColVector.isNull[i]) { - func(outputColVector, inputColVector, i); - } - } - } else { - System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); - for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { - func(outputColVector, inputColVector, i); - } - } - } - } - } - @Override public String vectorExpressionParameters() { return getColumnParamString(0, inputColumn); @@ -147,12 +51,9 @@ public String vectorExpressionParameters() { @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DECIMAL) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index 7fc8f6ab40..8ed98cc29c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -18,19 +18,16 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; /** - * This is a superclass for unary double functions and expressions returning decimals that - * operate directly on the input and set the output. + * This is a superclass for unary double functions and expressions returning decimals that operate + * directly on the input and set the output. */ -public abstract class FuncDoubleToDecimal extends VectorExpression { +public abstract class FuncDoubleToDecimal + extends BaseVectorExpression { private static final long serialVersionUID = 1L; private final int inputColumn; @@ -46,99 +43,6 @@ public FuncDoubleToDecimal() { inputColumn = -1; } - abstract protected void func(DecimalColumnVector outputColVector, DoubleColumnVector inputColVector, int i); - - @Override - public void evaluate(VectorizedRowBatch batch) throws HiveException { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[inputColumn]; - int[] sel = batch.selected; - int n = batch.size; - DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; - - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - - if (n == 0) { - - // Nothing to do - return; - } - - // We do not need to do a column reset since we are carefully changing the output. - outputColVector.isRepeating = false; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputIsNull[0]) { - // Set isNull before call in case it changes it mind. - outputIsNull[0] = false; - func(outputColVector, inputColVector, 0); - } else { - outputIsNull[0] = true; - outputColVector.noNulls = false; - } - outputColVector.isRepeating = true; - return; - } - - if (inputColVector.noNulls) { - if (batch.selectedInUse) { - - // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. - - if (!outputColVector.noNulls) { - for(int j = 0; j != n; j++) { - final int i = sel[j]; - // Set isNull before call in case it changes it mind. - outputIsNull[i] = false; - func(outputColVector, inputColVector, i); - } - } else { - for(int j = 0; j != n; j++) { - final int i = sel[j]; - func(outputColVector, inputColVector, i); - } - } - } else { - if (!outputColVector.noNulls) { - - // Assume it is almost always a performance win to fill all of isNull so we can - // safely reset noNulls. - Arrays.fill(outputIsNull, false); - outputColVector.noNulls = true; - } - for(int i = 0; i != n; i++) { - func(outputColVector, inputColVector, i); - } - } - } else /* there are nulls in the inputColVector */ { - - // Carefully handle NULLs... - outputColVector.noNulls = false; - - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector.isNull[i]; - if (!inputColVector.isNull[i]) { - func(outputColVector, inputColVector, i); - } - } - } else { - System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); - for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { - func(outputColVector, inputColVector, i); - } - } - } - } - } - public String vectorExpressionParameters() { return getColumnParamString(0, inputColumn); } @@ -146,12 +50,9 @@ public String vectorExpressionParameters() { @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index d3136c3ed6..be19dda429 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -18,19 +18,16 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; /** - * This is a superclass for unary long functions and expressions returning decimals that - * operate directly on the input and set the output. + * This is a superclass for unary long functions and expressions returning decimals that operate + * directly on the input and set the output. */ -public abstract class FuncLongToDecimal extends VectorExpression { +public abstract class FuncLongToDecimal + extends BaseVectorExpression { private static final long serialVersionUID = 1L; private final int inputColumn; @@ -46,99 +43,6 @@ public FuncLongToDecimal() { inputColumn = -1; } - abstract protected void func(DecimalColumnVector outputColVector, LongColumnVector inputColVector, int i); - - @Override - public void evaluate(VectorizedRowBatch batch) throws HiveException { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn]; - int[] sel = batch.selected; - int n = batch.size; - DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; - - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - - if (n == 0) { - - // Nothing to do - return; - } - - // We do not need to do a column reset since we are carefully changing the output. - outputColVector.isRepeating = false; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputIsNull[0]) { - // Set isNull before call in case it changes it mind. - outputIsNull[0] = false; - func(outputColVector, inputColVector, 0); - } else { - outputIsNull[0] = true; - outputColVector.noNulls = false; - } - outputColVector.isRepeating = true; - return; - } - - if (inputColVector.noNulls) { - if (batch.selectedInUse) { - - // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. - - if (!outputColVector.noNulls) { - for(int j = 0; j != n; j++) { - final int i = sel[j]; - // Set isNull before call in case it changes it mind. - outputIsNull[i] = false; - func(outputColVector, inputColVector, i); - } - } else { - for(int j = 0; j != n; j++) { - final int i = sel[j]; - func(outputColVector, inputColVector, i); - } - } - } else { - if (!outputColVector.noNulls) { - - // Assume it is almost always a performance win to fill all of isNull so we can - // safely reset noNulls. - Arrays.fill(outputIsNull, false); - outputColVector.noNulls = true; - } - for(int i = 0; i != n; i++) { - func(outputColVector, inputColVector, i); - } - } - } else /* there are nulls in the inputColVector */ { - - // Carefully handle NULLs... - outputColVector.noNulls = false; - - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector.isNull[i]; - if (!inputColVector.isNull[i]) { - func(outputColVector, inputColVector, i); - } - } - } else { - System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); - for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { - func(outputColVector, inputColVector, i); - } - } - } - } - } - public String vectorExpressionParameters() { return getColumnParamString(0, inputColumn); } @@ -146,12 +50,9 @@ public String vectorExpressionParameters() { @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.INT_FAMILY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(VectorExpressionDescriptor.ArgumentType.INT_FAMILY) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index 5805afcf23..db356b2caf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.sql.Timestamp; import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -26,7 +25,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.util.DateTimeMath; /** * Superclass to support vectorized functions that take a long