diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 536e418..cdc76d1 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -153,11 +153,13 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_where_partitioned.q,\ update_two_cols.q,\ vector_cast_constant.q,\ + vector_char_simple.q,\ vector_data_types.q,\ vector_decimal_aggregate.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ vector_string_concat.q,\ + vector_varchar_simple.q,\ vectorization_12.q,\ vectorization_13.q,\ vectorization_14.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 990a4f1..a144ced 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.exec.tez.tools.InputMerger; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -101,6 +102,7 @@ List row = new ArrayList(Utilities.reduceFieldNameList.size()); private DataOutputBuffer buffer; + private VectorizedRowBatchCtx[] batchContexts; private VectorizedRowBatch[] batches; // number of columns pertaining to keys in a vectorized row batch private int keysColumnOffset; @@ -148,6 +150,7 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep if(vectorized) { final int maxTags = redWork.getTagToValueDesc().size(); keyStructInspector = (StructObjectInspector)keyObjectInspector; + batchContexts = new VectorizedRowBatchCtx[maxTags]; batches = new VectorizedRowBatch[maxTags]; valueStructInspectors = new StructObjectInspector[maxTags]; valueStringWriters = new List[maxTags]; @@ -171,8 +174,6 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep /* vectorization only works with struct object inspectors */ valueStructInspectors[tag] = (StructObjectInspector)valueObjectInspector[tag]; - batches[tag] = VectorizedBatchUtil.constructVectorizedRowBatch(keyStructInspector, - valueStructInspectors[tag]); final int totalColumns = keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size(); valueStringWriters[tag] = new ArrayList(totalColumns); @@ -200,6 +201,11 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep } rowObjectInspector[tag] = ObjectInspectorFactory .getStandardStructObjectInspector(colNames, ois); + + Map scratchColumnTypeMap = redWork.getScratchColumnVectorTypes().get("_REDUCE_SHUFFLE_"); + batchContexts[tag] = new VectorizedRowBatchCtx(); + batchContexts[tag].init(scratchColumnTypeMap, (StructObjectInspector) rowObjectInspector[tag]); + batches[tag] = batchContexts[tag].createVectorizedRowBatch(); } else { ois.add(keyObjectInspector); ois.add(valueObjectInspector[tag]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index c77d002..98022cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -137,7 +137,21 @@ public void init(Configuration hiveConf, String fileKey, this.rowOI= rowOI; this.rawRowOI = rowOI; } - + + /** + * Initializes the VectorizedRowBatch context based on an scratch column type map and + * object inspector. + * @param columnTypeMap + * @param rowOI + * Object inspector that shapes the column types + */ + public void init(Map columnTypeMap, + StructObjectInspector rowOI) { + this.columnTypeMap = columnTypeMap; + this.rowOI= rowOI; + this.rawRowOI = rowOI; + } + /** * Initializes VectorizedRowBatch context based on the diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e682dba..c186047 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -550,7 +550,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, protected final Map scratchColumnContext = new HashMap(); - protected final Map, VectorizationContext> vContextsByTSOp = + protected final Map, VectorizationContext> vContextsByOp = new HashMap, VectorizationContext>(); protected final Set> opsDone = @@ -589,10 +589,10 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac int i= stack.size()-2; while (vContext == null) { if (i < 0) { - throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName())); + return null; } Operator opParent = (Operator) stack.get(i); - vContext = vContextsByTSOp.get(opParent); + vContext = vContextsByOp.get(opParent); --i; } return vContext; @@ -611,7 +611,7 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac if (vectorOp instanceof VectorizationContextRegion) { VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - vContextsByTSOp.put(op, vOutContext); + vContextsByOp.put(op, vOutContext); scratchColumnContext.put(vOutContext.getFileKey(), vOutContext); } } @@ -658,13 +658,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // vContext.setFileKey(onefile); scratchColumnContext.put(onefile, vContext); + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized MapWork operator " + op.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + } break; } } } - vContextsByTSOp.put(op, vContext); + vContextsByOp.put(op, vContext); } else { vContext = walkStackToFindVectorizationContext(stack, op); + if (vContext == null) { + throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName())); + } } assert vContext != null; @@ -679,7 +686,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - doVectorize(op, vContext); + Operator vectorOp = doVectorize(op, vContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added new vectorization context key=" + vOutContext.getFileKey() + + ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vOutContext.getColumnMap().toString()); + } + } return null; } @@ -691,6 +709,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private int keyColCount; private int valueColCount; private Map reduceColumnNameMap; + + private VectorizationContext reduceShuffleVectorizationContext; private Operator rootVectorOp; @@ -704,6 +724,7 @@ public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, i this.keyColCount = keyColCount; this.valueColCount = valueColCount; rootVectorOp = null; + reduceShuffleVectorizationContext = null; } @Override @@ -719,10 +740,21 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op.getParentOperators().size() == 0) { vContext = getReduceVectorizationContext(reduceColumnNameMap); - vContextsByTSOp.put(op, vContext); + vContext.setFileKey("_REDUCE_SHUFFLE_"); + scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext); + reduceShuffleVectorizationContext = vContext; saveRootVectorOp = true; + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + } } else { vContext = walkStackToFindVectorizationContext(stack, op); + if (vContext == null) { + // If we didn't find a context among the operators, assume the top -- reduce shuffle's vectorization context. + vContext = reduceShuffleVectorizationContext; + } } assert vContext != null; @@ -738,6 +770,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } Operator vectorOp = doVectorize(op, vContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added new vectorization context key=" + vOutContext.getFileKey() + + ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vOutContext.getColumnMap().toString()); + } + } if (vectorOp instanceof VectorGroupByOperator) { VectorGroupByOperator groupBy = (VectorGroupByOperator) vectorOp; VectorGroupByDesc vectorDesc = groupBy.getConf().getVectorDesc(); diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index ec46630..7ebe607 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -41,3 +41,15 @@ order by key desc limit 5; drop table char_2; + +-- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc; + +explain +insert into table char_3 select cint from alltypesorc limit 10; + +insert into table char_3 select cint from alltypesorc limit 10; + +drop table char_3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index 68d6b09..9e2aae2 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,12 +1,12 @@ SET hive.vectorized.execution.enabled=true; -drop table char_2; +drop table varchar_2; -create table char_2 ( +create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc; -insert overwrite table char_2 select * from src; +insert overwrite table varchar_2 select * from src; select key, value from src @@ -14,13 +14,13 @@ order by key asc limit 5; explain select key, value -from char_2 +from varchar_2 order by key asc limit 5; -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5; @@ -30,14 +30,26 @@ order by key desc limit 5; explain select key, value -from char_2 +from varchar_2 order by key desc limit 5; -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5; -drop table char_2; +drop table varchar_2; + +-- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc; + +explain +insert into table varchar_3 select cint from alltypesorc limit 10; + +insert into table varchar_3 select cint from alltypesorc limit 10; + +drop table varchar_3; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_char_simple.q.out ql/src/test/results/clientpositive/tez/vector_char_simple.q.out index bac33ec..fe651ca 100644 --- ql/src/test/results/clientpositive/tez/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -234,3 +234,109 @@ POSTHOOK: query: drop table char_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@char_2 POSTHOOK: Output: default@char_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS CHAR(12) (type: char(12)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out index f097414..f3d9147 100644 --- ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out @@ -1,31 +1,31 @@ -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_2 ( +PREHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@char_2 -POSTHOOK: query: create table char_2 ( +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_2 -PREHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: default@char_2 -POSTHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_2 -POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select key, value from src order by key asc @@ -46,12 +46,12 @@ POSTHOOK: Input: default@src 10 val_10 100 val_100 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY @@ -69,7 +69,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -107,19 +107,19 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 0 val_0 0 val_0 @@ -146,12 +146,12 @@ POSTHOOK: Input: default@src 97 val_97 96 val_96 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY @@ -169,7 +169,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -207,30 +207,136 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 98 val_98 98 val_98 97 val_97 97 val_97 96 val_96 -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_2 -PREHOOK: Output: default@char_2 -POSTHOOK: query: drop table char_2 +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_2 -POSTHOOK: Output: default@char_2 +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_3 +PREHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_3 +POSTHOOK: Lineage: varchar_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table varchar_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_3 +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: drop table varchar_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_3 +POSTHOOK: Output: default@varchar_3 diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index 72dc8aa..fbe1b40 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -220,3 +220,98 @@ POSTHOOK: query: drop table char_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@char_2 POSTHOOK: Output: default@char_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS CHAR(12) (type: char(12)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 1c77c39..1c774af 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -1,31 +1,31 @@ -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_2 ( +PREHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@char_2 -POSTHOOK: query: create table char_2 ( +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_2 -PREHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: default@char_2 -POSTHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_2 -POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select key, value from src order by key asc @@ -46,12 +46,12 @@ POSTHOOK: Input: default@src 10 val_10 100 val_100 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY @@ -64,7 +64,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -100,19 +100,19 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 0 val_0 0 val_0 @@ -139,12 +139,12 @@ POSTHOOK: Input: default@src 97 val_97 96 val_96 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY @@ -157,7 +157,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -193,30 +193,125 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 98 val_98 98 val_98 97 val_97 97 val_97 96 val_96 -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_2 -PREHOOK: Output: default@char_2 -POSTHOOK: query: drop table char_2 +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_2 -POSTHOOK: Output: default@char_2 +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_3 +PREHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_3 +POSTHOOK: Lineage: varchar_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table varchar_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_3 +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: drop table varchar_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_3 +POSTHOOK: Output: default@varchar_3