diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java
index c303b30..f14f1d9 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java
@@ -22,9 +22,9 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
public abstract class KeyWrapper {
- abstract void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException;
- abstract void setHashKey();
- abstract KeyWrapper copyKey();
- abstract void copyKey(KeyWrapper oldWrapper);
- abstract Object[] getKeyArray();
+ public abstract void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException;
+ public abstract void setHashKey();
+ public abstract KeyWrapper copyKey();
+ public abstract void copyKey(KeyWrapper oldWrapper);
+ public abstract Object[] getKeyArray();
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
deleted file mode 100644
index 7437f7d..0000000
--- ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapper.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-/**
- * A hash map key wrapper for vectorized processing.
- * It stores the key values as primitives in arrays for each supported primitive type.
- * This works in conjunction with
- * {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch VectorHashKeyWrapperBatch}
- * to hash vectorized processing units (batches).
- */
-public class VectorHashKeyWrapper extends KeyWrapper {
-
- private long[] longValues;
- private double[] doubleValues;
-
- private byte[][] byteValues;
- private int[] byteStarts;
- private int[] byteLengths;
-
- private boolean[] isNull;
- private int hashcode;
-
- public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount) {
- longValues = new long[longValuesCount];
- doubleValues = new double[doubleValuesCount];
- byteValues = new byte[byteValuesCount][];
- byteStarts = new int[byteValuesCount];
- byteLengths = new int[byteValuesCount];
- isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount];
- }
-
- private VectorHashKeyWrapper() {
- }
-
- @Override
- void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException {
- throw new HiveException("Should not be called");
- }
-
- @Override
- void setHashKey() {
- hashcode = Arrays.hashCode(longValues) ^
- Arrays.hashCode(doubleValues) ^
- Arrays.hashCode(isNull);
-
- // This code, with branches and all, is not executed if there are no string keys
- for (int i = 0; i < byteValues.length; ++i) {
- /*
- * Hashing the string is potentially expensive so is better to branch.
- * Additionally not looking at values for nulls allows us not reset the values.
- */
- if (!isNull[longValues.length + doubleValues.length + i]) {
- byte[] bytes = byteValues[i];
- int start = byteStarts[i];
- int length = byteLengths[i];
- if (length == bytes.length && start == 0) {
- hashcode ^= Arrays.hashCode(bytes);
- }
- else {
- // Unfortunately there is no Arrays.hashCode(byte[], start, length)
- for(int j = start; j < start + length; ++j) {
- // use 461 as is a (sexy!) prime.
- hashcode ^= 461 * bytes[j];
- }
- }
- }
- }
- }
-
- @Override
- public int hashCode() {
- return hashcode;
- }
-
- @Override
- public boolean equals(Object that) {
- if (that instanceof VectorHashKeyWrapper) {
- VectorHashKeyWrapper keyThat = (VectorHashKeyWrapper)that;
- return hashcode == keyThat.hashcode &&
- Arrays.equals(longValues, keyThat.longValues) &&
- Arrays.equals(doubleValues, keyThat.doubleValues) &&
- Arrays.equals(isNull, keyThat.isNull) &&
- byteValues.length == keyThat.byteValues.length &&
- (0 == byteValues.length || bytesEquals(keyThat));
- }
- return false;
- }
-
- private boolean bytesEquals(VectorHashKeyWrapper keyThat) {
- //By the time we enter here the byteValues.lentgh and isNull must have already been compared
- for (int i = 0; i < byteValues.length; ++i) {
- // the byte comparison is potentially expensive so is better to branch on null
- if (!isNull[longValues.length + doubleValues.length + i]) {
- if (0 != StringExpr.compare(
- byteValues[i],
- byteStarts[i],
- byteLengths[i],
- keyThat.byteValues[i],
- keyThat.byteStarts[i],
- keyThat.byteLengths[i])) {
- return false;
- }
- }
- }
- return true;
- }
-
- @Override
- protected Object clone() {
- VectorHashKeyWrapper clone = new VectorHashKeyWrapper();
- clone.longValues = longValues.clone();
- clone.doubleValues = doubleValues.clone();
- clone.isNull = isNull.clone();
-
- clone.byteValues = new byte[byteValues.length][];
- clone.byteStarts = new int[byteValues.length];
- clone.byteLengths = byteLengths.clone();
- for (int i = 0; i < byteValues.length; ++i) {
- // avoid allocation/copy of nulls, because it potentially expensive. branch instead.
- if (!isNull[i]) {
- clone.byteValues[i] = Arrays.copyOfRange(
- byteValues[i],
- byteStarts[i],
- byteStarts[i] + byteLengths[i]);
- }
- }
- clone.hashcode = hashcode;
- assert clone.equals(this);
- return clone;
- }
-
- @Override
- public KeyWrapper copyKey() {
- return (KeyWrapper) clone();
- }
-
- @Override
- void copyKey(KeyWrapper oldWrapper) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- Object[] getKeyArray() {
- // TODO Auto-generated method stub
- return null;
- }
-
- public void assignDouble(int index, double d) {
- doubleValues[index] = d;
- isNull[longValues.length + index] = false;
- }
-
- public void assignNullDouble(int index) {
- doubleValues[index] = 0; // assign 0 to simplify hashcode
- isNull[longValues.length + index] = true;
- }
-
- public void assignLong(int index, long v) {
- longValues[index] = v;
- isNull[index] = false;
- }
-
- public void assignNullLong(int index) {
- longValues[index] = 0; // assign 0 to simplify hashcode
- isNull[index] = true;
- }
-
- public void assignString(int index, byte[] bytes, int start, int length) {
- byteValues[index] = bytes;
- byteStarts[index] = start;
- byteLengths[index] = length;
- isNull[longValues.length + doubleValues.length + index] = false;
- }
-
- public void assignNullString(int index) {
- // We do not assign the value to [] because the value is never used on null
- isNull[longValues.length + doubleValues.length + index] = true;
- }
-
- @Override
- public String toString()
- {
- return String.format("%d[%s] %d[%s] %d[%s]",
- longValues.length, Arrays.toString(longValues),
- doubleValues.length, Arrays.toString(doubleValues),
- byteValues.length, Arrays.toString(byteValues));
- }
-
- public boolean getIsNull(int i) {
- return isNull[i];
- }
-
- public long getLongValue(int i) {
- return longValues[i];
- }
-
- public double getDoubleValue(int i) {
- return doubleValues[i - longValues.length];
- }
-
- public byte[] getBytes(int i) {
- return byteValues[i - longValues.length - doubleValues.length];
- }
-
- public int getByteStart(int i) {
- return byteStarts[i - longValues.length - doubleValues.length];
- }
-
- public int getByteLength(int i) {
- return byteLengths[i - longValues.length - doubleValues.length];
- }
-
-
-}
-
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
deleted file mode 100644
index 59bede4..0000000
--- ql/src/java/org/apache/hadoop/hive/ql/exec/VectorHashKeyWrapperBatch.java
+++ /dev/null
@@ -1,510 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec;
-
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.LongWritable;
-
-/**
- * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a
- * row batch in a vectorized fashion.
- * This class stores additional information about keys needed to evaluate and output the key values.
- *
- */
-public class VectorHashKeyWrapperBatch {
-
- /**
- * Helper class for looking up a key value based on key index
- *
- */
- private static class KeyLookupHelper {
- public int longIndex;
- public int doubleIndex;
- public int stringIndex;
- }
-
- /**
- * The key expressions that require evaluation and output the primitive values for each key.
- */
- private VectorExpression[] keyExpressions;
-
- /**
- * indices of LONG primitive keys
- */
- private int[] longIndices;
-
- /**
- * indices of DOUBLE primitive keys
- */
- private int[] doubleIndices;
-
- /*
- * indices of stirng (byte[]) primitive keys
- */
- private int[] stringIndices;
-
- /**
- * pre-allocated batch size vector of keys wrappers.
- * N.B. these keys are **mutable** and should never be used in a HashMap.
- * Always clone the key wrapper to obtain an immutable keywrapper suitable
- * to use a key in a HashMap.
- */
- private VectorHashKeyWrapper[] vectorHashKeyWrappers;
-
- /**
- * lookup vector to map from key index to primitive type index
- */
- private KeyLookupHelper[] indexLookup;
-
- /**
- * Accessor for the batch-sized array of key wrappers
- */
- public VectorHashKeyWrapper[] getVectorHashKeyWrappers() {
- return vectorHashKeyWrappers;
- }
-
- /**
- * Processes a batch:
- *
- * - Evaluates each key vector expression.
- * - Copies out each key's primitive values into the key wrappers
- * - computes the hashcode of the key wrappers
- *
- * @param batch
- * @throws HiveException
- */
- public void evaluateBatch (VectorizedRowBatch batch) throws HiveException {
- for(int i = 0; i < keyExpressions.length; ++i) {
- keyExpressions[i].evaluate(batch);
- }
- for(int i = 0; i< longIndices.length; ++i) {
- int keyIndex = longIndices[i];
- int columnIndex = keyExpressions[keyIndex].getOutputColumn();
- LongColumnVector columnVector = (LongColumnVector) batch.cols[columnIndex];
- if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
- assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
- } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
- assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
- } else if (columnVector.noNulls && columnVector.isRepeating) {
- assignLongNoNullsRepeating(i, batch.size, columnVector);
- } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
- assignLongNullsNoRepeatingNoSelection(i, batch.size, columnVector);
- } else if (!columnVector.noNulls && columnVector.isRepeating) {
- assignLongNullsRepeating(i, batch.size, columnVector);
- } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
- assignLongNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
- } else {
- throw new HiveException (String.format("Unimplemented Long null/repeat/selected combination %b/%b/%b",
- columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
- }
- }
- for(int i=0;i= 0) {
- return keyOutputWriter.writeValue(kw.getLongValue(i));
- } else if (klh.doubleIndex >= 0) {
- return keyOutputWriter.writeValue(kw.getDoubleValue(i));
- } else if (klh.stringIndex >= 0) {
- return keyOutputWriter.writeValue(
- kw.getBytes(i), kw.getByteStart(i), kw.getByteLength(i));
- } else {
- throw new HiveException(String.format(
- "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d",
- i, klh.longIndex, klh.doubleIndex, klh.stringIndex));
- }
- }
-}
-
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 07eccea..4634731 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -30,8 +30,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.KeyWrapper;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapper;
-import org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
new file mode 100644
index 0000000..01dd7be
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.KeyWrapper;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * A hash map key wrapper for vectorized processing.
+ * It stores the key values as primitives in arrays for each supported primitive type.
+ * This works in conjunction with
+ * {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch VectorHashKeyWrapperBatch}
+ * to hash vectorized processing units (batches).
+ */
+public class VectorHashKeyWrapper extends KeyWrapper {
+
+ private long[] longValues;
+ private double[] doubleValues;
+
+ private byte[][] byteValues;
+ private int[] byteStarts;
+ private int[] byteLengths;
+
+ private boolean[] isNull;
+ private int hashcode;
+
+ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount) {
+ longValues = new long[longValuesCount];
+ doubleValues = new double[doubleValuesCount];
+ byteValues = new byte[byteValuesCount][];
+ byteStarts = new int[byteValuesCount];
+ byteLengths = new int[byteValuesCount];
+ isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount];
+ }
+
+ private VectorHashKeyWrapper() {
+ }
+
+ @Override
+ public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException {
+ throw new HiveException("Should not be called");
+ }
+
+ @Override
+ public void setHashKey() {
+ hashcode = Arrays.hashCode(longValues) ^
+ Arrays.hashCode(doubleValues) ^
+ Arrays.hashCode(isNull);
+
+ // This code, with branches and all, is not executed if there are no string keys
+ for (int i = 0; i < byteValues.length; ++i) {
+ /*
+ * Hashing the string is potentially expensive so is better to branch.
+ * Additionally not looking at values for nulls allows us not reset the values.
+ */
+ if (!isNull[longValues.length + doubleValues.length + i]) {
+ byte[] bytes = byteValues[i];
+ int start = byteStarts[i];
+ int length = byteLengths[i];
+ if (length == bytes.length && start == 0) {
+ hashcode ^= Arrays.hashCode(bytes);
+ }
+ else {
+ // Unfortunately there is no Arrays.hashCode(byte[], start, length)
+ for(int j = start; j < start + length; ++j) {
+ // use 461 as is a (sexy!) prime.
+ hashcode ^= 461 * bytes[j];
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return hashcode;
+ }
+
+ @Override
+ public boolean equals(Object that) {
+ if (that instanceof VectorHashKeyWrapper) {
+ VectorHashKeyWrapper keyThat = (VectorHashKeyWrapper)that;
+ return hashcode == keyThat.hashcode &&
+ Arrays.equals(longValues, keyThat.longValues) &&
+ Arrays.equals(doubleValues, keyThat.doubleValues) &&
+ Arrays.equals(isNull, keyThat.isNull) &&
+ byteValues.length == keyThat.byteValues.length &&
+ (0 == byteValues.length || bytesEquals(keyThat));
+ }
+ return false;
+ }
+
+ private boolean bytesEquals(VectorHashKeyWrapper keyThat) {
+ //By the time we enter here the byteValues.lentgh and isNull must have already been compared
+ for (int i = 0; i < byteValues.length; ++i) {
+ // the byte comparison is potentially expensive so is better to branch on null
+ if (!isNull[longValues.length + doubleValues.length + i]) {
+ if (0 != StringExpr.compare(
+ byteValues[i],
+ byteStarts[i],
+ byteLengths[i],
+ keyThat.byteValues[i],
+ keyThat.byteStarts[i],
+ keyThat.byteLengths[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ @Override
+ protected Object clone() {
+ VectorHashKeyWrapper clone = new VectorHashKeyWrapper();
+ clone.longValues = longValues.clone();
+ clone.doubleValues = doubleValues.clone();
+ clone.isNull = isNull.clone();
+
+ clone.byteValues = new byte[byteValues.length][];
+ clone.byteStarts = new int[byteValues.length];
+ clone.byteLengths = byteLengths.clone();
+ for (int i = 0; i < byteValues.length; ++i) {
+ // avoid allocation/copy of nulls, because it potentially expensive. branch instead.
+ if (!isNull[i]) {
+ clone.byteValues[i] = Arrays.copyOfRange(
+ byteValues[i],
+ byteStarts[i],
+ byteStarts[i] + byteLengths[i]);
+ }
+ }
+ clone.hashcode = hashcode;
+ assert clone.equals(this);
+ return clone;
+ }
+
+ @Override
+ public KeyWrapper copyKey() {
+ return (KeyWrapper) clone();
+ }
+
+ @Override
+ public void copyKey(KeyWrapper oldWrapper) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Object[] getKeyArray() {
+ throw new UnsupportedOperationException();
+ }
+
+ public void assignDouble(int index, double d) {
+ doubleValues[index] = d;
+ isNull[longValues.length + index] = false;
+ }
+
+ public void assignNullDouble(int index) {
+ doubleValues[index] = 0; // assign 0 to simplify hashcode
+ isNull[longValues.length + index] = true;
+ }
+
+ public void assignLong(int index, long v) {
+ longValues[index] = v;
+ isNull[index] = false;
+ }
+
+ public void assignNullLong(int index) {
+ longValues[index] = 0; // assign 0 to simplify hashcode
+ isNull[index] = true;
+ }
+
+ public void assignString(int index, byte[] bytes, int start, int length) {
+ byteValues[index] = bytes;
+ byteStarts[index] = start;
+ byteLengths[index] = length;
+ isNull[longValues.length + doubleValues.length + index] = false;
+ }
+
+ public void assignNullString(int index) {
+ // We do not assign the value to [] because the value is never used on null
+ isNull[longValues.length + doubleValues.length + index] = true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return String.format("%d[%s] %d[%s] %d[%s]",
+ longValues.length, Arrays.toString(longValues),
+ doubleValues.length, Arrays.toString(doubleValues),
+ byteValues.length, Arrays.toString(byteValues));
+ }
+
+ public boolean getIsNull(int i) {
+ return isNull[i];
+ }
+
+ public long getLongValue(int i) {
+ return longValues[i];
+ }
+
+ public double getDoubleValue(int i) {
+ return doubleValues[i - longValues.length];
+ }
+
+ public byte[] getBytes(int i) {
+ return byteValues[i - longValues.length - doubleValues.length];
+ }
+
+ public int getByteStart(int i) {
+ return byteStarts[i - longValues.length - doubleValues.length];
+ }
+
+ public int getByteLength(int i) {
+ return byteLengths[i - longValues.length - doubleValues.length];
+ }
+
+
+}
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
new file mode 100644
index 0000000..5e547c7
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
@@ -0,0 +1,510 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a
+ * row batch in a vectorized fashion.
+ * This class stores additional information about keys needed to evaluate and output the key values.
+ *
+ */
+public class VectorHashKeyWrapperBatch {
+
+ /**
+ * Helper class for looking up a key value based on key index
+ *
+ */
+ private static class KeyLookupHelper {
+ public int longIndex;
+ public int doubleIndex;
+ public int stringIndex;
+ }
+
+ /**
+ * The key expressions that require evaluation and output the primitive values for each key.
+ */
+ private VectorExpression[] keyExpressions;
+
+ /**
+ * indices of LONG primitive keys
+ */
+ private int[] longIndices;
+
+ /**
+ * indices of DOUBLE primitive keys
+ */
+ private int[] doubleIndices;
+
+ /*
+ * indices of string (byte[]) primitive keys
+ */
+ private int[] stringIndices;
+
+ /**
+ * pre-allocated batch size vector of keys wrappers.
+ * N.B. these keys are **mutable** and should never be used in a HashMap.
+ * Always clone the key wrapper to obtain an immutable keywrapper suitable
+ * to use a key in a HashMap.
+ */
+ private VectorHashKeyWrapper[] vectorHashKeyWrappers;
+
+ /**
+ * lookup vector to map from key index to primitive type index
+ */
+ private KeyLookupHelper[] indexLookup;
+
+ /**
+ * Accessor for the batch-sized array of key wrappers
+ */
+ public VectorHashKeyWrapper[] getVectorHashKeyWrappers() {
+ return vectorHashKeyWrappers;
+ }
+
+ /**
+ * Processes a batch:
+ *
+ * - Evaluates each key vector expression.
+ * - Copies out each key's primitive values into the key wrappers
+ * - computes the hashcode of the key wrappers
+ *
+ * @param batch
+ * @throws HiveException
+ */
+ public void evaluateBatch (VectorizedRowBatch batch) throws HiveException {
+ for(int i = 0; i < keyExpressions.length; ++i) {
+ keyExpressions[i].evaluate(batch);
+ }
+ for(int i = 0; i< longIndices.length; ++i) {
+ int keyIndex = longIndices[i];
+ int columnIndex = keyExpressions[keyIndex].getOutputColumn();
+ LongColumnVector columnVector = (LongColumnVector) batch.cols[columnIndex];
+ if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected);
+ } else if (columnVector.noNulls && columnVector.isRepeating) {
+ assignLongNoNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) {
+ assignLongNullsNoRepeatingNoSelection(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && columnVector.isRepeating) {
+ assignLongNullsRepeating(i, batch.size, columnVector);
+ } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) {
+ assignLongNullsNoRepeatingSelection (i, batch.size, columnVector, batch.selected);
+ } else {
+ throw new HiveException (String.format("Unimplemented Long null/repeat/selected combination %b/%b/%b",
+ columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse));
+ }
+ }
+ for(int i=0;i= 0) {
+ return keyOutputWriter.writeValue(kw.getLongValue(i));
+ } else if (klh.doubleIndex >= 0) {
+ return keyOutputWriter.writeValue(kw.getDoubleValue(i));
+ } else if (klh.stringIndex >= 0) {
+ return keyOutputWriter.writeValue(
+ kw.getBytes(i), kw.getByteStart(i), kw.getByteLength(i));
+ } else {
+ throw new HiveException(String.format(
+ "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d",
+ i, klh.longIndex, klh.doubleIndex, klh.stringIndex));
+ }
+ }
+}
+