diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 1e24710..514c6d6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -23,6 +23,8 @@ import java.util.LinkedList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; @@ -50,6 +52,7 @@ import org.apache.hadoop.io.Text; public class VectorizedBatchUtil { + private static final Log LOG = LogFactory.getLog(VectorizedBatchUtil.class); /** * Sets the IsNull value for ColumnVector at specified index @@ -232,169 +235,237 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, final int off = colOffset; // Iterate thru the cols and load the batch for (int i = 0; i < fieldRefs.size(); i++) { - Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); - ObjectInspector foi = fieldRefs.get(i).getFieldObjectInspector(); + setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, off); + } + } - // Vectorization only supports PRIMITIVE data types. Assert the same - assert (foi.getCategory() == Category.PRIMITIVE); + /** + * Iterates thru all the columns in a given row and populates the batch + * from a given offset + * + * @param row Deserialized row object + * @param oi Object insepector for that row + * @param rowIndex index to which the row should be added to batch + * @param batch Vectorized batch to which the row is added at rowIndex + * @param context context object for this vectorized batch + * @param buffer + * @throws HiveException + */ + public static void acidAddRowToBatch(Object row, + StructObjectInspector oi, + int rowIndex, + VectorizedRowBatch batch, + VectorizedRowBatchCtx context, + DataOutputBuffer buffer) throws HiveException { + List fieldRefs = oi.getAllStructFieldRefs(); + // Iterate thru the cols and load the batch + for (int i = 0; i < fieldRefs.size(); i++) { + if (batch.cols[i] == null) { + // This means the column was not included in the projection from the underlying read + continue; + } + if (context.isPartitionCol(i)) { + // The value will have already been set before we're called, so don't overwrite it + continue; + } + setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, 0); + } + } - // Get writable object - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; - Object writableCol = poi.getPrimitiveWritableObject(fieldData); + private static void setVector(Object row, + StructObjectInspector oi, + List fieldRefs, + VectorizedRowBatch batch, + DataOutputBuffer buffer, + int rowIndex, + int colIndex, + int offset) throws HiveException { - // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for - // float/double. String types have no default value for null. - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0; - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } - } - break; - case BYTE: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - lcv.vector[rowIndex] = ((ByteWritable) writableCol).get(); - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } + Object fieldData = oi.getStructFieldData(row, fieldRefs.get(colIndex)); + ObjectInspector foi = fieldRefs.get(colIndex).getFieldObjectInspector(); + + // Vectorization only supports PRIMITIVE data types. Assert the same + assert (foi.getCategory() == Category.PRIMITIVE); + + // Get writable object + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; + Object writableCol = poi.getPrimitiveWritableObject(fieldData); + + // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for + // float/double. String types have no default value for null. + switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0; + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case SHORT: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - lcv.vector[rowIndex] = ((ShortWritable) writableCol).get(); - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } + } + break; + case BYTE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((ByteWritable) writableCol).get(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case INT: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - lcv.vector[rowIndex] = ((IntWritable) writableCol).get(); - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } + } + break; + case SHORT: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((ShortWritable) writableCol).get(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case LONG: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - lcv.vector[rowIndex] = ((LongWritable) writableCol).get(); - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } + } + break; + case INT: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((IntWritable) writableCol).get(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case DATE: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays(); - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } + } + break; + case LONG: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((LongWritable) writableCol).get(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case FLOAT: { - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i]; - if (writableCol != null) { - dcv.vector[rowIndex] = ((FloatWritable) writableCol).get(); - dcv.isNull[rowIndex] = false; - } else { - dcv.vector[rowIndex] = Double.NaN; - setNullColIsNullValue(dcv, rowIndex); - } + } + break; + case DATE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case DOUBLE: { - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i]; - if (writableCol != null) { - dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); - dcv.isNull[rowIndex] = false; - } else { - dcv.vector[rowIndex] = Double.NaN; - setNullColIsNullValue(dcv, rowIndex); - } + } + break; + case FLOAT: { + DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + dcv.vector[rowIndex] = ((FloatWritable) writableCol).get(); + dcv.isNull[rowIndex] = false; + } else { + dcv.vector[rowIndex] = Double.NaN; + setNullColIsNullValue(dcv, rowIndex); } - break; - case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[off + i]; - if (writableCol != null) { - Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); - lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t); - lcv.isNull[rowIndex] = false; - } else { - lcv.vector[rowIndex] = 1; - setNullColIsNullValue(lcv, rowIndex); - } + } + break; + case DOUBLE: { + DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get(); + dcv.isNull[rowIndex] = false; + } else { + dcv.vector[rowIndex] = Double.NaN; + setNullColIsNullValue(dcv, rowIndex); } - break; - case BINARY: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; - if (writableCol != null) { - bcv.isNull[rowIndex] = false; - BytesWritable bw = (BytesWritable) writableCol; - byte[] bytes = bw.getBytes(); - int start = buffer.getLength(); - int length = bytes.length; - try { - buffer.write(bytes, 0, length); - } catch (IOException ioe) { - throw new IllegalStateException("bad write", ioe); - } - bcv.setRef(rowIndex, buffer.getData(), start, length); - } else { - setNullColIsNullValue(bcv, rowIndex); - } + } + break; + case TIMESTAMP: { + LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); + lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + setNullColIsNullValue(lcv, rowIndex); } - break; - case STRING: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; - if (writableCol != null) { + } + break; + case BINARY: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { bcv.isNull[rowIndex] = false; - Text colText = (Text) writableCol; + BytesWritable bw = (BytesWritable) writableCol; + byte[] bytes = bw.getBytes(); int start = buffer.getLength(); - int length = colText.getLength(); + int length = bytes.length; try { - buffer.write(colText.getBytes(), 0, length); + buffer.write(bytes, 0, length); } catch (IOException ioe) { throw new IllegalStateException("bad write", ioe); } bcv.setRef(rowIndex, buffer.getData(), start, length); - } else { - setNullColIsNullValue(bcv, rowIndex); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; + case STRING: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + Text colText = (Text) writableCol; + int start = buffer.getLength(); + int length = colText.getLength(); + try { + buffer.write(colText.getBytes(), 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); } - break; - case CHAR: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; + } + break; + case CHAR: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar(); + byte[] bytes = colHiveChar.getStrippedValue().getBytes(); + + // We assume the CHAR maximum length was enforced when the object was created. + int length = bytes.length; + + int start = buffer.getLength(); + try { + // In vector mode, we store CHAR as unpadded. + buffer.write(bytes, 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; + case VARCHAR: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex]; if (writableCol != null) { bcv.isNull[rowIndex] = false; - HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar(); - byte[] bytes = colHiveChar.getStrippedValue().getBytes(); - - // We assume the CHAR maximum length was enforced when the object was created. + HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar(); + byte[] bytes = colHiveVarchar.getValue().getBytes(); + + // We assume the VARCHAR maximum length was enforced when the object was created. int length = bytes.length; int start = buffer.getLength(); try { - // In vector mode, we store CHAR as unpadded. buffer.write(bytes, 0, length); } catch (IOException ioe) { throw new IllegalStateException("bad write", ioe); @@ -405,45 +476,21 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } } break; - case VARCHAR: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; - if (writableCol != null) { - bcv.isNull[rowIndex] = false; - HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar(); - byte[] bytes = colHiveVarchar.getValue().getBytes(); - - // We assume the VARCHAR maximum length was enforced when the object was created. - int length = bytes.length; - - int start = buffer.getLength(); - try { - buffer.write(bytes, 0, length); - } catch (IOException ioe) { - throw new IllegalStateException("bad write", ioe); - } - bcv.setRef(rowIndex, buffer.getData(), start, length); - } else { - setNullColIsNullValue(bcv, rowIndex); - } - } - break; - case DECIMAL: - DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i]; - if (writableCol != null) { - dcv.isNull[rowIndex] = false; - HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol; - dcv.vector[rowIndex].update(wobj.getHiveDecimal().unscaledValue(), - (short) wobj.getScale()); - } else { - setNullColIsNullValue(dcv, rowIndex); - } - break; - default: - throw new HiveException("Vectorizaton is not supported for datatype:" - + poi.getPrimitiveCategory()); + case DECIMAL: + DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex]; + if (writableCol != null) { + dcv.isNull[rowIndex] = false; + HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol; + dcv.vector[rowIndex].update(wobj.getHiveDecimal().unscaledValue(), + (short) wobj.getScale()); + } else { + setNullColIsNullValue(dcv, rowIndex); } + break; + default: + throw new HiveException("Vectorizaton is not supported for datatype:" + + poi.getPrimitiveCategory()); } } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 22f5f5e..81a8ee7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -22,10 +22,12 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -84,7 +86,11 @@ private Map partitionValues; //partition types - private Map partitionTypes; + private Map partitionTypes; + + // partition column positions, for use by classes that need to know whether a given column is a + // partition column + private Set partitionCols; // Column projection list - List of column indexes to include. This // list does not contain partition columns @@ -203,12 +209,13 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx // Check to see if this split is part of a partition of a table String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + String[] partKeys = null; if (pcols != null && pcols.length() > 0) { // Partitions exist for this table. Get the partition object inspector and // raw row object inspector (row with out partition col) LinkedHashMap partSpec = part.getPartSpec(); - String[] partKeys = pcols.trim().split("/"); + partKeys = pcols.trim().split("/"); String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); String[] partKeyTypes = pcolTypes.trim().split(":"); @@ -262,6 +269,15 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx .asList(new StructObjectInspector[] {partRawRowObjectInspector, partObjectInspector})); rowOI = rowObjectInspector; rawRowOI = partRawRowObjectInspector; + + // We have to do this after we've set rowOI, as getColIndexBasedOnColName uses it + partitionCols = new HashSet(); + if (pcols != null && pcols.length() > 0) { + for (int i = 0; i < partKeys.length; i++) { + partitionCols.add(getColIndexBasedOnColName(partKeys[i])); + } + } + } else { // No partitions for this table, hence row OI equals raw row OI @@ -586,6 +602,16 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti } } + /** + * Determine whether a given column is a partition column + * @param colnum column number in + * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}s created by this context. + * @return true if it is a partition column, false otherwise + */ + public final boolean isPartitionCol(int colnum) { + return (partitionCols == null) ? false : partitionCols.contains(colnum); + } + private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException { if (columnTypeMap != null && !columnTypeMap.isEmpty()) { int origNumCols = vrb.numCols; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java index ca90fc5..a8e5c2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java @@ -48,7 +48,6 @@ private final OrcStruct value; private final VectorizedRowBatchCtx rowBatchCtx; private final ObjectInspector objectInspector; - private boolean needToSetPartition = true; private final DataOutputBuffer buffer = new DataOutputBuffer(); VectorizedOrcAcidRowReader(AcidInputFormat.RowReader inner, @@ -83,23 +82,20 @@ public boolean next(NullWritable nullWritable, if (!innerReader.next(key, value)) { return false; } - if (needToSetPartition) { - try { - rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch); - } catch (HiveException e) { - throw new IOException("Problem adding partition column", e); - } - needToSetPartition = false; + try { + rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch); + } catch (HiveException e) { + throw new IOException("Problem adding partition column", e); } try { - VectorizedBatchUtil.addRowToBatch(value, + VectorizedBatchUtil.acidAddRowToBatch(value, (StructObjectInspector) objectInspector, - vectorizedRowBatch.size++, vectorizedRowBatch, buffer); + vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer); while (vectorizedRowBatch.size < vectorizedRowBatch.selected.length && innerReader.next(key, value)) { - VectorizedBatchUtil.addRowToBatch(value, + VectorizedBatchUtil.acidAddRowToBatch(value, (StructObjectInspector) objectInspector, - vectorizedRowBatch.size++, vectorizedRowBatch, buffer); + vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer); } } catch (HiveException he) { throw new IOException("error iterating", he); diff --git ql/src/test/queries/clientpositive/acid_vectorization_partition.q ql/src/test/queries/clientpositive/acid_vectorization_partition.q new file mode 100644 index 0000000..9348d05 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_vectorization_partition.q @@ -0,0 +1,10 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; +insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; +set hive.vectorized.execution.enabled=true; +select * from acid_vectorized_part order by a, b; diff --git ql/src/test/queries/clientpositive/acid_vectorization_project.q ql/src/test/queries/clientpositive/acid_vectorization_project.q new file mode 100644 index 0000000..a44b57a --- /dev/null +++ ql/src/test/queries/clientpositive/acid_vectorization_project.q @@ -0,0 +1,11 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint is not null order by cint limit 10; +set hive.vectorized.execution.enabled=true; +select a,b from acid_vectorized order by a; +select a,c from acid_vectorized order by a; +select b,c from acid_vectorized order by b; diff --git ql/src/test/results/clientpositive/acid_vectorization_partition.q.out ql/src/test/results/clientpositive/acid_vectorization_partition.q.out new file mode 100644 index 0000000..ee97cc9 --- /dev/null +++ ql/src/test/results/clientpositive/acid_vectorization_partition.q.out @@ -0,0 +1,60 @@ +PREHOOK: query: CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized_part +POSTHOOK: query: CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized_part +PREHOOK: query: insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized_part@ds=today +POSTHOOK: query: insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized_part@ds=today +POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=today).a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=today).b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized_part@ds=tomorrow +POSTHOOK: query: insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized_part@ds=tomorrow +POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=tomorrow).a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=tomorrow).b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: select * from acid_vectorized_part order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized_part +PREHOOK: Input: default@acid_vectorized_part@ds=today +PREHOOK: Input: default@acid_vectorized_part@ds=tomorrow +#### A masked pattern was here #### +POSTHOOK: query: select * from acid_vectorized_part order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized_part +POSTHOOK: Input: default@acid_vectorized_part@ds=today +POSTHOOK: Input: default@acid_vectorized_part@ds=tomorrow +#### A masked pattern was here #### +-1073279343 oj1YrV5Wa today +-1073279343 oj1YrV5Wa tomorrow +-1073051226 A34p7oRr2WvUJNf tomorrow +-1073051226 A34p7oRr2WvUJNf today +-1072910839 0iqrc5 tomorrow +-1072910839 0iqrc5 today +-1072081801 dPkN74F7 today +-1072081801 dPkN74F7 tomorrow +-1072076362 2uLyD28144vklju213J1mr today +-1072076362 2uLyD28144vklju213J1mr tomorrow +-1071480828 aw724t8c5558x2xneC624 tomorrow +-1071480828 aw724t8c5558x2xneC624 today +-1071363017 Anj0oF today +-1071363017 Anj0oF tomorrow +-1070883071 0ruyd6Y50JpdGRf6HqD tomorrow +-1070883071 0ruyd6Y50JpdGRf6HqD today +-1070551679 iUR3Q today +-1070551679 iUR3Q tomorrow +-1069736047 k17Am8uPHWk02cEf1jet tomorrow +-1069736047 k17Am8uPHWk02cEf1jet today diff --git ql/src/test/results/clientpositive/acid_vectorization_project.q.out ql/src/test/results/clientpositive/acid_vectorization_project.q.out new file mode 100644 index 0000000..1bdacb9 --- /dev/null +++ ql/src/test/results/clientpositive/acid_vectorization_project.q.out @@ -0,0 +1,73 @@ +PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: acid_vectorized.c SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +PREHOOK: query: select a,b from acid_vectorized order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select a,b from acid_vectorized order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +-1073279343 oj1YrV5Wa +-1073051226 A34p7oRr2WvUJNf +-1072910839 0iqrc5 +-1072081801 dPkN74F7 +-1072076362 2uLyD28144vklju213J1mr +-1071480828 aw724t8c5558x2xneC624 +-1071363017 Anj0oF +-1070883071 0ruyd6Y50JpdGRf6HqD +-1070551679 iUR3Q +-1069736047 k17Am8uPHWk02cEf1jet +PREHOOK: query: select a,c from acid_vectorized order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select a,c from acid_vectorized order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +-1073279343 11.0 +-1073051226 NULL +-1072910839 11.0 +-1072081801 NULL +-1072076362 NULL +-1071480828 -51.0 +-1071363017 8.0 +-1070883071 NULL +-1070551679 NULL +-1069736047 11.0 +PREHOOK: query: select b,c from acid_vectorized order by b +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select b,c from acid_vectorized order by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +0iqrc5 11.0 +0ruyd6Y50JpdGRf6HqD NULL +2uLyD28144vklju213J1mr NULL +A34p7oRr2WvUJNf NULL +Anj0oF 8.0 +aw724t8c5558x2xneC624 -51.0 +dPkN74F7 NULL +iUR3Q NULL +k17Am8uPHWk02cEf1jet 11.0 +oj1YrV5Wa 11.0