diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 3bfe4f9..7758ac4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -21,7 +21,9 @@ import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Class to keep information on a set of typed vector columns. Used by @@ -75,6 +77,7 @@ // Given the keyIndex these arrays return: // The ColumnVector.Type, // The type specific index into longIndices, doubleIndices, etc... + protected TypeInfo[] typeInfos; protected ColumnVector.Type[] columnVectorTypes; protected int[] columnTypeSpecificIndices; @@ -96,13 +99,15 @@ protected VectorColumnSetInfo(int keyCount) { intervalDayTimeIndices = new int[this.keyCount]; addIntervalDayTimeIndex = 0; + typeInfos = new TypeInfo[this.keyCount]; columnVectorTypes = new ColumnVector.Type[this.keyCount]; columnTypeSpecificIndices = new int[this.keyCount]; } - protected void addKey(ColumnVector.Type columnVectorType) throws HiveException { + protected void addKey(TypeInfo typeInfo) throws HiveException { + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); switch (columnVectorType) { case LONG: case DECIMAL_64: @@ -133,6 +138,7 @@ protected void addKey(ColumnVector.Type columnVectorType) throws HiveException { throw new HiveException("Unexpected column vector type " + columnVectorType); } + typeInfos[addKeyIndex] = typeInfo; columnVectorTypes[addKeyIndex] = columnVectorType; addKeyIndex++; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 6ae6727..82dc4a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -49,8 +49,7 @@ void init(VectorExpression[] keyExpressions) throws HiveException { VectorExpression keyExpression = keyExpressions[i]; TypeInfo typeInfo = keyExpression.getOutputTypeInfo(); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - addKey(columnVectorType); + addKey(typeInfo); // The output of the key expression is the input column. final int inputColumnNum = keyExpression.getOutputColumnNum(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index eb870a7..7c01175 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -20,6 +20,7 @@ import org.apache.hive.common.util.Murmur3; +import java.sql.Date; import java.sql.Timestamp; import java.util.Arrays; @@ -29,8 +30,11 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import com.google.common.base.Preconditions; @@ -350,7 +354,10 @@ public void assignNullDecimal(int keyIndex, int index) { } public void assignTimestamp(int index, Timestamp value) { - timestampValues[index] = value; + // Do not assign the input value object to the timestampValues array element. + // Always copy value using set* methods. + timestampValues[index].setTime(value.getTime()); + timestampValues[index].setNanos(value.getNanos()); } public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { @@ -359,7 +366,9 @@ public void assignTimestamp(int index, TimestampColumnVector colVector, int elem public void assignNullTimestamp(int keyIndex, int index) { isNull[keyIndex] = true; - timestampValues[index] = ZERO_TIMESTAMP; // assign 0 to simplify hashcode + // assign 0 to simplify hashcode + timestampValues[index].setTime(ZERO_TIMESTAMP.getTime()); + timestampValues[index].setNanos(ZERO_TIMESTAMP.getNanos()); } public void assignIntervalDayTime(int index, HiveIntervalDayTime value) { @@ -372,7 +381,158 @@ public void assignIntervalDayTime(int index, IntervalDayTimeColumnVector colVect public void assignNullIntervalDayTime(int keyIndex, int index) { isNull[keyIndex] = true; - intervalDayTimeValues[index] = ZERO_INTERVALDAYTIME; // assign 0 to simplify hashcode + intervalDayTimeValues[index].set(ZERO_INTERVALDAYTIME); // assign 0 to simplify hashcode + } + + public String stringifyKeys(VectorColumnSetInfo columnSetInfo) + { + StringBuilder sb = new StringBuilder(); + boolean isFirstKey = true; + + if (longValues.length > 0) { + isFirstKey = false; + sb.append("longs "); + boolean isFirstValue = true; + for (int i = 0; i < columnSetInfo.longIndices.length; i++) { + if (isFirstValue) { + isFirstValue = false; + } else { + sb.append(", "); + } + int keyIndex = columnSetInfo.longIndices[i]; + if (isNull[keyIndex]) { + sb.append("null"); + } else { + sb.append(longValues[i]); + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) columnSetInfo.typeInfos[keyIndex]; + switch (primitiveTypeInfo.getPrimitiveCategory()) { + case DATE: + { + Date dt = new Date(0); + dt.setTime(DateWritable.daysToMillis((int) longValues[i])); + sb.append(" date "); + sb.append(dt.toString()); + } + break; + default: + // Add nothing more. + break; + } + } + } + } + if (doubleValues.length > 0) { + if (isFirstKey) { + isFirstKey = false; + } else { + sb.append(", "); + } + sb.append("doubles "); + boolean isFirstValue = true; + for (int i = 0; i < columnSetInfo.doubleIndices.length; i++) { + if (isFirstValue) { + isFirstValue = false; + } else { + sb.append(", "); + } + int keyIndex = columnSetInfo.doubleIndices[i]; + if (isNull[keyIndex]) { + sb.append("null"); + } else { + sb.append(doubleValues[i]); + } + } + } + if (byteValues.length > 0) { + if (isFirstKey) { + isFirstKey = false; + } else { + sb.append(", "); + } + sb.append("byte lengths "); + boolean isFirstValue = true; + for (int i = 0; i < columnSetInfo.stringIndices.length; i++) { + if (isFirstValue) { + isFirstValue = false; + } else { + sb.append(", "); + } + int keyIndex = columnSetInfo.stringIndices[i]; + if (isNull[keyIndex]) { + sb.append("null"); + } else { + sb.append(byteLengths[i]); + } + } + } + if (decimalValues.length > 0) { + if (isFirstKey) { + isFirstKey = true; + } else { + sb.append(", "); + } + sb.append("decimals "); + boolean isFirstValue = true; + for (int i = 0; i < columnSetInfo.decimalIndices.length; i++) { + if (isFirstValue) { + isFirstValue = false; + } else { + sb.append(", "); + } + int keyIndex = columnSetInfo.decimalIndices[i]; + if (isNull[keyIndex]) { + sb.append("null"); + } else { + sb.append(decimalValues[i]); + } + } + } + if (timestampValues.length > 0) { + if (isFirstKey) { + isFirstKey = false; + } else { + sb.append(", "); + } + sb.append("timestamps "); + boolean isFirstValue = true; + for (int i = 0; i < columnSetInfo.timestampIndices.length; i++) { + if (isFirstValue) { + isFirstValue = false; + } else { + sb.append(", "); + } + int keyIndex = columnSetInfo.timestampIndices[i]; + if (isNull[keyIndex]) { + sb.append("null"); + } else { + sb.append(timestampValues[i]); + } + } + } + if (intervalDayTimeValues.length > 0) { + if (isFirstKey) { + isFirstKey = false; + } else { + sb.append(", "); + } + sb.append("interval day times "); + boolean isFirstValue = true; + for (int i = 0; i < columnSetInfo.intervalDayTimeIndices.length; i++) { + if (isFirstValue) { + isFirstValue = false; + } else { + sb.append(", "); + } + int keyIndex = columnSetInfo.intervalDayTimeIndices[i]; + if (isNull[keyIndex]) { + sb.append("null"); + } else { + sb.append(intervalDayTimeValues[i]); + } + } + } + + return sb.toString(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index 2b401ac..43dd835 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -106,132 +107,150 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { keyIndex = longIndices[i]; columnIndex = keyExpressions[keyIndex].getOutputColumnNum(); LongColumnVector columnVector = (LongColumnVector) batch.cols[columnIndex]; - if (columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { - assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); - } else if (columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { - assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); - } else if ((columnVector.noNulls || !columnVector.isNull[0]) && columnVector.isRepeating) { - assignLongNoNullsRepeating(i, batch.size, columnVector); - } else if (!columnVector.noNulls && !columnVector.isRepeating && !batch.selectedInUse) { - assignLongNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); - } else if (!columnVector.noNulls && columnVector.isRepeating) { - assignLongNullsRepeating(keyIndex, i, batch.size, columnVector); - } else if (!columnVector.noNulls && !columnVector.isRepeating && batch.selectedInUse) { - assignLongNullsNoRepeatingSelection (keyIndex, i, batch.size, columnVector, batch.selected); + + if (columnVector.isRepeating) { + if (columnVector.noNulls || !columnVector.isNull[0]) { + assignLongNoNullsRepeating(i, batch.size, columnVector); + } else { + assignLongNullsRepeating(keyIndex, i, batch.size, columnVector); + } + } else if (columnVector.noNulls) { + if (batch.selectedInUse) { + assignLongNoNullsNoRepeatingSelection(i, batch.size, columnVector, batch.selected); + } else { + assignLongNoNullsNoRepeatingNoSelection(i, batch.size, columnVector); + } } else { - throw new HiveException (String.format( - "Unimplemented Long null/repeat/selected combination %b/%b/%b", - columnVector.noNulls, columnVector.isRepeating, batch.selectedInUse)); + if (batch.selectedInUse) { + assignLongNullsNoRepeatingSelection (keyIndex, i, batch.size, columnVector, batch.selected); + } else { + assignLongNullsNoRepeatingNoSelection(keyIndex, i, batch.size, columnVector); + } } } for(int i=0;i 0) { b.append(", "); } - cv.stringifyValue(b, i); + if (cv != null) { + try { + cv.stringifyValue(b, i); + } catch (Exception ex) { + b.append(""); + } + } } b.append(']'); if (j < size - 1) { b.append('\n'); + b.append(prefix); } } } else { @@ -260,6 +267,7 @@ public String toString() { b.append(']'); if (i < size - 1) { b.append('\n'); + b.append(prefix); } } } @@ -267,6 +275,11 @@ public String toString() { } @Override + public String toString() { + return stringify(""); + } + + @Override public void readFields(DataInput arg0) throws IOException { throw new UnsupportedOperationException("Do you really need me?"); }