commit 7f5bead88accf2ab21b16143db8e30bdfed9deab Author: Owen O'Malley Date: Wed Jun 24 16:09:13 2015 -0700 HIVE-11080. Make the VectorizedRowBatch.toString() not depend on VectorExpressionWriter. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 23d6a5d..bf8918f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -379,8 +379,6 @@ public void processRow(Object key, Object value) throws IOException { } catch (Exception e) { String rowString = null; try { - /* batch.toString depends on this */ - batch.setValueWriters(valueStringWriters[tag].toArray(new VectorExpressionWriter[0])); rowString = batch.toString(); } catch (Exception e2) { rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index cdabe3a..d2f0a74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -461,9 +461,6 @@ private void processVectorGroup(BytesWritable keyWritable, } catch (Exception e) { String rowString = null; try { - /* batch.toString depends on this */ - batch.setValueWriters(valueStringWriters - .toArray(new VectorExpressionWriter[0])); rowString = batch.toString(); } catch (Exception e2) { rowString = "[Error getting row data with exception " diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 8ec7ead..c9a0fa2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -330,4 +330,18 @@ public void setElement(int outElementNum, int inputElementNum, ColumnVector inpu public void init() { initBuffer(0); } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('"'); + buffer.append(new String(this.buffer, start[row], length[row])); + buffer.append('"'); + } else { + buffer.append("null"); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 6654166..49d4c12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -166,5 +166,13 @@ protected void flattenPush() { public void init() { // Do nothing by default } + + /** + * Print the value for this column into the given string builder. + * @param buffer the buffer to print into + * @param row the id of the row to print + */ + public abstract void stringifyValue(StringBuilder buffer, + int row); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 5009a42..0f63b29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -85,6 +85,18 @@ public void setElement(int outElementNum, int inputElementNum, ColumnVector inpu } } + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row].toString()); + } else { + buffer.append("null"); + } + } + public void set(int elementNum, HiveDecimalWritable writeable) { HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale); if (hiveDec == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 525b3c5..013a9f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -146,4 +146,16 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum]; } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index f0545fe..d900cc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -190,4 +190,16 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum]; } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 212aa99..86b74e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -129,8 +129,6 @@ public void process(Object row, int tag) throws HiveException { } // Prepare output, set the projections - VectorExpressionWriter [] originalValueWriters = vrg.valueWriters; - vrg.setValueWriters(valueWriters); int[] originalProjections = vrg.projectedColumns; int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = vExpressions.length; @@ -140,7 +138,6 @@ public void process(Object row, int tag) throws HiveException { // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; vrg.projectedColumns = originalProjections; - vrg.valueWriters = originalValueWriters; } static public String getOperatorName() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 7e41384..7c18da6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -20,13 +20,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -61,8 +55,6 @@ */ public static final int DEFAULT_SIZE = 1024; - public VectorExpressionWriter[] valueWriters = null; - /** * Return a batch with the specified number of columns. * This is the standard constructor -- all batches should be the same size @@ -112,7 +104,7 @@ public long count() { return size; } - private String toUTF8(Object o) { + private static String toUTF8(Object o) { if(o == null || o instanceof NullWritable) { return "\\N"; /* as found in LazySimpleSerDe's nullSequence */ } @@ -125,47 +117,39 @@ public String toString() { return ""; } StringBuilder b = new StringBuilder(); - try { - if (this.selectedInUse) { - for (int j = 0; j < size; j++) { - int i = selected[j]; - for (int k = 0; k < projectionSize; k++) { - int projIndex = projectedColumns[k]; - ColumnVector cv = cols[projIndex]; - if (k > 0) { - b.append('\u0001'); - } - if (cv.isRepeating) { - b.append(toUTF8(valueWriters[k].writeValue(cv, 0))); - } else { - b.append(toUTF8(valueWriters[k].writeValue(cv, i))); - } - } - if (j < size - 1) { - b.append('\n'); + if (this.selectedInUse) { + for (int j = 0; j < size; j++) { + int i = selected[j]; + b.append('['); + for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { + b.append(", "); } + cv.stringifyValue(b, i); } - } else { - for (int i = 0; i < size; i++) { - for (int k = 0; k < projectionSize; k++) { - int projIndex = projectedColumns[k]; - ColumnVector cv = cols[projIndex]; - if (k > 0) { - b.append('\u0001'); - } - if (cv.isRepeating) { - b.append(toUTF8(valueWriters[k].writeValue(cv, 0))); - } else { - b.append(toUTF8(valueWriters[k].writeValue(cv, i))); - } - } - if (i < size - 1) { - b.append('\n'); + b.append(']'); + if (j < size - 1) { + b.append('\n'); + } + } + } else { + for (int i = 0; i < size; i++) { + b.append('['); + for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { + b.append(", "); } + cv.stringifyValue(b, i); + } + b.append(']'); + if (i < size - 1) { + b.append('\n'); } } - } catch (HiveException ex) { - throw new RuntimeException(ex); } return b.toString(); } @@ -180,10 +164,6 @@ public void write(DataOutput arg0) throws IOException { throw new UnsupportedOperationException("Don't call me"); } - public void setValueWriters(VectorExpressionWriter[] valueWriters) { - this.valueWriters = valueWriters; - } - /** * Resets the row batch to default state * - sets selectedInUse to false