diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3ea6bb5..e525798 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -154,11 +154,13 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_partitioned.q,\ update_two_cols.q,\ vector_cast_constant.q,\ + vector_char_simple.q,\ vector_data_types.q,\ vector_decimal_aggregate.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ vector_string_concat.q,\ + vector_varchar_simple.q,\ vectorization_0.q,\ vectorization_12.q,\ vectorization_13.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 0ce371e..941f97c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -100,7 +100,8 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep sources[tag] = new ReduceRecordSource(); sources[tag].init(jconf, reducer, redWork.getVectorMode(), keyTableDesc, valueTableDesc, - reader, tag == position, (byte) tag); + reader, tag == position, (byte) tag, + redWork.getScratchColumnVectorTypes()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 1ca62da..8b24e31 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -28,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -85,6 +87,7 @@ List row = new ArrayList(Utilities.reduceFieldNameList.size()); private DataOutputBuffer buffer; + private VectorizedRowBatchCtx batchContext; private VectorizedRowBatch batch; // number of columns pertaining to keys in a vectorized row batch @@ -110,7 +113,8 @@ private final boolean grouped = true; void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, - TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag) + TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag, + Map> scratchColumnVectorTypes) throws Exception { ObjectInspector keyObjectInspector; @@ -149,9 +153,6 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT /* vectorization only works with struct object inspectors */ valueStructInspectors = (StructObjectInspector) valueObjectInspector; - batch = VectorizedBatchUtil.constructVectorizedRowBatch(keyStructInspector, - valueStructInspectors); - final int totalColumns = keysColumnOffset + valueStructInspectors.getAllStructFieldRefs().size(); valueStringWriters = new ArrayList(totalColumns); @@ -178,6 +179,23 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT ois.add(field.getFieldObjectInspector()); } rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); + + fields = ((StructObjectInspector) rowObjectInspector).getAllStructFieldRefs(); + int i = 0; + for (StructField field: fields) { + ObjectInspector fs = field.getFieldObjectInspector(); + System.out.println("ReduceRecordSource init rowObjectInspector field [" + i + "] " + field.getFieldName() + ", type " + fs.getTypeName()); + i++; + } + Map reduceShuffleScratchColumnTypeMap = + scratchColumnVectorTypes.get("_REDUCE_SHUFFLE_"); + System.out.println("ReduceRecordSource init reduceShuffleScratchColumnTypeMap " + reduceShuffleScratchColumnTypeMap); + batchContext = new VectorizedRowBatchCtx(); + batchContext.init(reduceShuffleScratchColumnTypeMap, (StructObjectInspector) rowObjectInspector); + batch = batchContext.createVectorizedRowBatch(); + for (i = 0; i < batch.numCols; i++) { + System.out.println("ReduceRecordSource init columnVector class [" + i + "] " + batch.cols[i].getClass().getSimpleName()); + } } else { ois.add(keyObjectInspector); ois.add(valueObjectInspector); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java index b573e3e..1ddcbc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -45,7 +46,8 @@ private int keyColCount; private int valueColCount; - private transient int [] projectedColumns = null; + private transient VectorizedRowBatch outputBatch; + private transient int remainingColCount; public VectorExtractOperator(VectorizationContext vContext, OperatorDesc conf) throws HiveException { @@ -57,26 +59,25 @@ public VectorExtractOperator() { super(); } - private StructObjectInspector makeStandardStructObjectInspector(StructObjectInspector structObjectInspector) { - List fields = structObjectInspector.getAllStructFieldRefs(); + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + StructObjectInspector structInputObjInspector = (StructObjectInspector) inputObjInspectors[0]; + List fields = structInputObjInspector.getAllStructFieldRefs(); ArrayList ois = new ArrayList(); ArrayList colNames = new ArrayList(); - for (StructField field: fields) { - colNames.add(field.getFieldName()); + for (int i = keyColCount; i < fields.size(); i++) { + StructField field = fields.get(i); + String fieldName = field.getFieldName(); + + // Remove "VALUE." prefix. + int dotIndex = fieldName.indexOf("."); + colNames.add(fieldName.substring(dotIndex + 1)); ois.add(field.getFieldObjectInspector()); } - return ObjectInspectorFactory + outputObjInspector = ObjectInspectorFactory .getStandardStructObjectInspector(colNames, ois); - } - - @Override - protected void initializeOp(Configuration hconf) throws HiveException { - outputObjInspector = inputObjInspectors[0]; - LOG.info("VectorExtractOperator class of outputObjInspector is " + outputObjInspector.getClass().getName()); - projectedColumns = new int [valueColCount]; - for (int i = 0; i < valueColCount; i++) { - projectedColumns[i] = keyColCount + i; - } + remainingColCount = fields.size() - keyColCount; + outputBatch = new VectorizedRowBatch(remainingColCount); initializeChildren(hconf); } @@ -86,20 +87,16 @@ public void setKeyAndValueColCounts(int keyColCount, int valueColCount) { } @Override - // Evaluate vectorized batches of rows and forward them. + // Remove the key columns and forward the values (and scratch columns). public void processOp(Object row, int tag) throws HiveException { - VectorizedRowBatch vrg = (VectorizedRowBatch) row; + VectorizedRowBatch inputBatch = (VectorizedRowBatch) row; - // Project away the key columns... - int[] originalProjections = vrg.projectedColumns; - int originalProjectionSize = vrg.projectionSize; - vrg.projectionSize = valueColCount; - vrg.projectedColumns = this.projectedColumns; - - forward(vrg, outputObjInspector); + // Copy references to the input columns array starting after the keys... + for (int i = 0; i < remainingColCount; i++) { + outputBatch.cols[i] = inputBatch.cols[keyColCount + i]; + } + outputBatch.size = inputBatch.size; - // Revert the projected columns back, because vrg will be re-used. - vrg.projectionSize = originalProjectionSize; - vrg.projectedColumns = originalProjections; + forward(outputBatch, outputObjInspector); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index e546dd1..ea32f33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.io.IOException; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -27,16 +25,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.ObjectWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; /** * File Sink operator implementation. @@ -69,113 +58,10 @@ protected void initializeOp(Configuration hconf) throws HiveException { @Override public void processOp(Object data, int tag) throws HiveException { - VectorizedRowBatch vrg = (VectorizedRowBatch)data; - - Writable [] records = null; - boolean vectorizedSerde = false; - try { - if (serializer instanceof VectorizedSerde) { - recordValue = ((VectorizedSerde) serializer).serializeVector(vrg, - inputObjInspectors[0]); - records = (Writable[]) ((ObjectWritable) recordValue).get(); - vectorizedSerde = true; - } - } catch (SerDeException e1) { - throw new HiveException(e1); - } - for (int i = 0; i < vrg.size; i++) { - Writable row = null; - if (vectorizedSerde) { - row = records[i]; - } else { - if (vrg.valueWriters == null) { - vrg.setValueWriters(this.valueWriters); - } - try { - row = serializer.serialize(getRowObject(vrg, i), inputObjInspectors[0]); - } catch (SerDeException ex) { - throw new HiveException(ex); - } - } - /* Create list bucketing sub-directory only if stored-as-directories is on. */ - String lbDirName = null; - lbDirName = (lbCtx == null) ? null : generateListBucketingDirName(row); - - FSPaths fpaths; - - if (!bDynParts && !filesCreated) { - if (lbDirName != null) { - FSPaths fsp2 = lookupListBucketingPaths(lbDirName); - } else { - createBucketFiles(fsp); - } - } - - try { - updateProgress(); - - // if DP is enabled, get the final output writers and prepare the real output row - assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct"; - - if (bDynParts) { - // copy the DP column values from the input row to dpVals - dpVals.clear(); - dpWritables.clear(); - ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts, - (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE); - // get a set of RecordWriter based on the DP column values - // pass the null value along to the escaping process to determine what the dir should be - for (Object o : dpWritables) { - if (o == null || o.toString().length() == 0) { - dpVals.add(dpCtx.getDefaultPartitionName()); - } else { - dpVals.add(o.toString()); - } - } - fpaths = getDynOutPaths(dpVals, lbDirName); - - } else { - if (lbDirName != null) { - fpaths = lookupListBucketingPaths(lbDirName); - } else { - fpaths = fsp; - } - } - - rowOutWriters = fpaths.getOutWriters(); - // check if all record writers implement statistics. if atleast one RW - // doesn't implement stats interface we will fallback to conventional way - // of gathering stats - isCollectRWStats = areAllTrue(statsFromRecordWriter); - if (conf.isGatherStats() && !isCollectRWStats) { - if (statsCollectRawDataSize) { - SerDeStats stats = serializer.getSerDeStats(); - if (stats != null) { - fpaths.getStat().addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); - } - } - fpaths.getStat().addToStat(StatsSetupConst.ROW_COUNT, 1); - } - - - if (row_count != null) { - row_count.set(row_count.get() + 1); - } - - if (!multiFileSpray) { - rowOutWriters[0].write(row); - } else { - int keyHashCode = 0; - key.setHashCode(keyHashCode); - int bucketNum = prtner.getBucket(key, null, totalFiles); - int idx = bucketMap.get(bucketNum); - rowOutWriters[idx].write(row); - } - } catch (IOException e) { - throw new HiveException(e); - } + Object[] row = getRowObject(vrg, i); + super.processOp(row, tag); } } @@ -187,7 +73,7 @@ public void processOp(Object data, int tag) throws HiveException { } for (int i = 0; i < vrg.projectionSize; i++) { ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; - singleRow[i] = vrg.valueWriters[i].writeValue(vectorColumn, batchIndex); + singleRow[i] = valueWriters[i].writeValue(vectorColumn, batchIndex); } return singleRow; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index c77d002..21c757e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -140,6 +140,20 @@ public void init(Configuration hiveConf, String fileKey, /** + * Initializes the VectorizedRowBatch context based on an scratch column type map and + * object inspector. + * @param columnTypeMap + * @param rowOI + * Object inspector that shapes the column types + */ + public void init(Map columnTypeMap, + StructObjectInspector rowOI) { + this.columnTypeMap = columnTypeMap; + this.rowOI= rowOI; + this.rawRowOI = rowOI; + } + + /** * Initializes VectorizedRowBatch context based on the * split and Hive configuration (Job conf with hive Plan). * diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e77d41a..783f4c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -550,7 +550,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, protected final Map scratchColumnContext = new HashMap(); - protected final Map, VectorizationContext> vContextsByTSOp = + protected final Map, VectorizationContext> vContextsByOp = new HashMap, VectorizationContext>(); protected final Set> opsDone = @@ -589,10 +589,10 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac int i= stack.size()-2; while (vContext == null) { if (i < 0) { - throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName())); + return null; } Operator opParent = (Operator) stack.get(i); - vContext = vContextsByTSOp.get(opParent); + vContext = vContextsByOp.get(opParent); --i; } return vContext; @@ -611,7 +611,7 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac if (vectorOp instanceof VectorizationContextRegion) { VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - vContextsByTSOp.put(op, vOutContext); + vContextsByOp.put(op, vOutContext); scratchColumnContext.put(vOutContext.getFileKey(), vOutContext); } } @@ -658,13 +658,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // vContext.setFileKey(onefile); scratchColumnContext.put(onefile, vContext); + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized MapWork operator " + op.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + } break; } } } - vContextsByTSOp.put(op, vContext); + vContextsByOp.put(op, vContext); } else { vContext = walkStackToFindVectorizationContext(stack, op); + if (vContext == null) { + throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName())); + } } assert vContext != null; @@ -679,7 +686,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - doVectorize(op, vContext); + Operator vectorOp = doVectorize(op, vContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added new vectorization context key=" + vOutContext.getFileKey() + + ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vOutContext.getColumnMap().toString()); + } + } return null; } @@ -691,6 +709,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private int keyColCount; private int valueColCount; private Map reduceColumnNameMap; + + private VectorizationContext reduceShuffleVectorizationContext; private Operator rootVectorOp; @@ -704,6 +724,7 @@ public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, i this.keyColCount = keyColCount; this.valueColCount = valueColCount; rootVectorOp = null; + reduceShuffleVectorizationContext = null; } @Override @@ -719,10 +740,21 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op.getParentOperators().size() == 0) { vContext = getReduceVectorizationContext(reduceColumnNameMap); - vContextsByTSOp.put(op, vContext); + vContext.setFileKey("_REDUCE_SHUFFLE_"); + scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext); + reduceShuffleVectorizationContext = vContext; saveRootVectorOp = true; + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + } } else { vContext = walkStackToFindVectorizationContext(stack, op); + if (vContext == null) { + // If we didn't find a context among the operators, assume the top -- reduce shuffle's vectorization context. + vContext = reduceShuffleVectorizationContext; + } } assert vContext != null; @@ -738,6 +770,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } Operator vectorOp = doVectorize(op, vContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added new vectorization context key=" + vOutContext.getFileKey() + + ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vOutContext.getColumnMap().toString()); + } + } if (vectorOp instanceof VectorGroupByOperator) { VectorGroupByOperator groupBy = (VectorGroupByOperator) vectorOp; VectorGroupByDesc vectorDesc = groupBy.getConf().getVectorDesc(); @@ -1003,11 +1046,6 @@ private boolean validateExtractOperator(ExtractOperator op) { } private boolean validateFileSinkOperator(FileSinkOperator op) { - // HIVE-7557: For now, turn off dynamic partitioning to give more time to - // figure out how to make VectorFileSink work correctly with it... - if (op.getConf().getDynPartCtx() != null) { - return false; - } return true; } diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index ec46630..858fe16 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -41,3 +41,16 @@ order by key desc limit 5; drop table char_2; + + +-- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc; + +explain +insert into table char_3 select cint from alltypesorc limit 10; + +insert into table char_3 select cint from alltypesorc limit 10; + +drop table char_3; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index 68d6b09..1cd30ee 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,12 +1,12 @@ SET hive.vectorized.execution.enabled=true; -drop table char_2; +drop table varchar_2; -create table char_2 ( +create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc; -insert overwrite table char_2 select * from src; +insert overwrite table varchar_2 select * from src; select key, value from src @@ -14,13 +14,13 @@ order by key asc limit 5; explain select key, value -from char_2 +from varchar_2 order by key asc limit 5; -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5; @@ -30,14 +30,26 @@ order by key desc limit 5; explain select key, value -from char_2 +from varchar_2 order by key desc limit 5; -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5; -drop table char_2; +drop table varchar_2; + +-- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc; + +explain +insert into table varchar_3 select cint from alltypesorc limit 10; + +insert into table varchar_3 select cint from alltypesorc limit 10; + +drop table varchar_3; diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index 53dfb2c..1ce5f4a 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -214,6 +214,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -300,6 +301,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_limit_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -368,6 +370,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_buck_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -435,6 +438,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_buck_sort_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -585,6 +589,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -671,6 +676,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_limit_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -739,6 +745,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_buck_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -806,6 +813,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.over1k_part_buck_sort_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -1362,6 +1370,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -1443,6 +1452,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -1837,6 +1847,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection @@ -1904,6 +1915,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc + Execution mode: vectorized Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/tez/vector_char_simple.q.out ql/src/test/results/clientpositive/tez/vector_char_simple.q.out index bac33ec..fe651ca 100644 --- ql/src/test/results/clientpositive/tez/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -234,3 +234,109 @@ POSTHOOK: query: drop table char_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@char_2 POSTHOOK: Output: default@char_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS CHAR(12) (type: char(12)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out index f097414..f3d9147 100644 --- ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out @@ -1,31 +1,31 @@ -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_2 ( +PREHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@char_2 -POSTHOOK: query: create table char_2 ( +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_2 -PREHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: default@char_2 -POSTHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_2 -POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select key, value from src order by key asc @@ -46,12 +46,12 @@ POSTHOOK: Input: default@src 10 val_10 100 val_100 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY @@ -69,7 +69,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -107,19 +107,19 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 0 val_0 0 val_0 @@ -146,12 +146,12 @@ POSTHOOK: Input: default@src 97 val_97 96 val_96 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY @@ -169,7 +169,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -207,30 +207,136 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 98 val_98 98 val_98 97 val_97 97 val_97 96 val_96 -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_2 -PREHOOK: Output: default@char_2 -POSTHOOK: query: drop table char_2 +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_2 -POSTHOOK: Output: default@char_2 +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_3 +PREHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_3 +POSTHOOK: Lineage: varchar_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table varchar_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_3 +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: drop table varchar_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_3 +POSTHOOK: Output: default@varchar_3 diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index 72dc8aa..fbe1b40 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -220,3 +220,98 @@ POSTHOOK: query: drop table char_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@char_2 POSTHOOK: Output: default@char_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS CHAR(12) (type: char(12)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 1c77c39..1c774af 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -1,31 +1,31 @@ -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_2 ( +PREHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@char_2 -POSTHOOK: query: create table char_2 ( +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_2 -PREHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: default@char_2 -POSTHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_2 -POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select key, value from src order by key asc @@ -46,12 +46,12 @@ POSTHOOK: Input: default@src 10 val_10 100 val_100 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY @@ -64,7 +64,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -100,19 +100,19 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 0 val_0 0 val_0 @@ -139,12 +139,12 @@ POSTHOOK: Input: default@src 97 val_97 96 val_96 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY @@ -157,7 +157,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -193,30 +193,125 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 98 val_98 98 val_98 97 val_97 97 val_97 96 val_96 -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_2 -PREHOOK: Output: default@char_2 -POSTHOOK: query: drop table char_2 +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_2 -POSTHOOK: Output: default@char_2 +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_3 +PREHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_3 +POSTHOOK: Lineage: varchar_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table varchar_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_3 +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: drop table varchar_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_3 +POSTHOOK: Output: default@varchar_3