diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 68263ea..c2400fe 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -155,12 +155,14 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_two_cols.q,\ vector_cast_constant.q,\ vector_char_4.q,\ + vector_char_simple.q,\ vector_data_types.q,\ vector_decimal_aggregate.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ vector_string_concat.q,\ vector_varchar_4.q,\ + vector_varchar_simple.q,\ vectorization_0.q,\ vectorization_12.q,\ vectorization_13.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 0ce371e..941f97c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -100,7 +100,8 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep sources[tag] = new ReduceRecordSource(); sources[tag].init(jconf, reducer, redWork.getVectorMode(), keyTableDesc, valueTableDesc, - reader, tag == position, (byte) tag); + reader, tag == position, (byte) tag, + redWork.getScratchColumnVectorTypes()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 1ca62da..8a54433 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -28,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -85,6 +87,7 @@ List row = new ArrayList(Utilities.reduceFieldNameList.size()); private DataOutputBuffer buffer; + private VectorizedRowBatchCtx batchContext; private VectorizedRowBatch batch; // number of columns pertaining to keys in a vectorized row batch @@ -110,7 +113,8 @@ private final boolean grouped = true; void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, - TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag) + TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag, + Map> scratchColumnVectorTypes) throws Exception { ObjectInspector keyObjectInspector; @@ -149,9 +153,6 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT /* vectorization only works with struct object inspectors */ valueStructInspectors = (StructObjectInspector) valueObjectInspector; - batch = VectorizedBatchUtil.constructVectorizedRowBatch(keyStructInspector, - valueStructInspectors); - final int totalColumns = keysColumnOffset + valueStructInspectors.getAllStructFieldRefs().size(); valueStringWriters = new ArrayList(totalColumns); @@ -178,6 +179,12 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT ois.add(field.getFieldObjectInspector()); } rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); + + Map reduceShuffleScratchColumnTypeMap = + scratchColumnVectorTypes.get("_REDUCE_SHUFFLE_"); + batchContext = new VectorizedRowBatchCtx(); + batchContext.init(reduceShuffleScratchColumnTypeMap, (StructObjectInspector) rowObjectInspector); + batch = batchContext.createVectorizedRowBatch(); } else { ois.add(keyObjectInspector); ois.add(valueObjectInspector); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index c77d002..21c757e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -140,6 +140,20 @@ public void init(Configuration hiveConf, String fileKey, /** + * Initializes the VectorizedRowBatch context based on an scratch column type map and + * object inspector. + * @param columnTypeMap + * @param rowOI + * Object inspector that shapes the column types + */ + public void init(Map columnTypeMap, + StructObjectInspector rowOI) { + this.columnTypeMap = columnTypeMap; + this.rowOI= rowOI; + this.rawRowOI = rowOI; + } + + /** * Initializes VectorizedRowBatch context based on the * split and Hive configuration (Job conf with hive Plan). * diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e77d41a..da71d98 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -550,7 +550,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, protected final Map scratchColumnContext = new HashMap(); - protected final Map, VectorizationContext> vContextsByTSOp = + protected final Map, VectorizationContext> vContextsByOp = new HashMap, VectorizationContext>(); protected final Set> opsDone = @@ -589,10 +589,10 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac int i= stack.size()-2; while (vContext == null) { if (i < 0) { - throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName())); + return null; } Operator opParent = (Operator) stack.get(i); - vContext = vContextsByTSOp.get(opParent); + vContext = vContextsByOp.get(opParent); --i; } return vContext; @@ -611,7 +611,7 @@ public VectorizationContext walkStackToFindVectorizationContext(Stack stac if (vectorOp instanceof VectorizationContextRegion) { VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - vContextsByTSOp.put(op, vOutContext); + vContextsByOp.put(op, vOutContext); scratchColumnContext.put(vOutContext.getFileKey(), vOutContext); } } @@ -658,13 +658,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // vContext.setFileKey(onefile); scratchColumnContext.put(onefile, vContext); + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized MapWork operator " + op.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + } break; } } } - vContextsByTSOp.put(op, vContext); + vContextsByOp.put(op, vContext); } else { vContext = walkStackToFindVectorizationContext(stack, op); + if (vContext == null) { + throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName())); + } } assert vContext != null; @@ -679,7 +686,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - doVectorize(op, vContext); + Operator vectorOp = doVectorize(op, vContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added new vectorization context key=" + vOutContext.getFileKey() + + ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vOutContext.getColumnMap().toString()); + } + } return null; } @@ -691,6 +709,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private int keyColCount; private int valueColCount; private Map reduceColumnNameMap; + + private VectorizationContext reduceShuffleVectorizationContext; private Operator rootVectorOp; @@ -704,6 +724,7 @@ public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, i this.keyColCount = keyColCount; this.valueColCount = valueColCount; rootVectorOp = null; + reduceShuffleVectorizationContext = null; } @Override @@ -719,10 +740,21 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op.getParentOperators().size() == 0) { vContext = getReduceVectorizationContext(reduceColumnNameMap); - vContextsByTSOp.put(op, vContext); + vContext.setFileKey("_REDUCE_SHUFFLE_"); + scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext); + reduceShuffleVectorizationContext = vContext; saveRootVectorOp = true; + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + } } else { vContext = walkStackToFindVectorizationContext(stack, op); + if (vContext == null) { + // If we didn't find a context among the operators, assume the top -- reduce shuffle's vectorization context. + vContext = reduceShuffleVectorizationContext; + } } assert vContext != null; @@ -738,6 +770,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } Operator vectorOp = doVectorize(op, vContext); + + if (LOG.isDebugEnabled()) { + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " with vectorization context key=" + vContext.getFileKey() + + ", vectorTypes: " + vContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vContext.getColumnMap().toString()); + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added new vectorization context key=" + vOutContext.getFileKey() + + ", vectorTypes: " + vOutContext.getOutputColumnTypeMap().toString() + ", columnMap: " + vOutContext.getColumnMap().toString()); + } + } if (vectorOp instanceof VectorGroupByOperator) { VectorGroupByOperator groupBy = (VectorGroupByOperator) vectorOp; VectorGroupByDesc vectorDesc = groupBy.getConf().getVectorDesc(); @@ -791,7 +834,6 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { boolean validateMapWorkOperator(Operator op, boolean isTez) { boolean ret = false; - LOG.info("Validating MapWork operator " + op.getType().name()); switch (op.getType()) { case MAPJOIN: if (op instanceof MapJoinOperator) { @@ -829,7 +871,6 @@ boolean validateMapWorkOperator(Operator op, boolean isT boolean validateReduceWorkOperator(Operator op) { boolean ret = false; - LOG.info("Validating ReduceWork operator " + op.getType().name()); switch (op.getType()) { case EXTRACT: ret = validateExtractOperator((ExtractOperator) op); @@ -843,12 +884,7 @@ boolean validateReduceWorkOperator(Operator op) { } break; case GROUPBY: - if (HiveConf.getBoolVar(physicalContext.getConf(), - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { - ret = validateGroupByOperator((GroupByOperator) op, true, true); - } else { - ret = false; - } + ret = validateGroupByOperator((GroupByOperator) op, true, true); break; case FILTER: ret = validateFilterOperator((FilterOperator) op); @@ -1080,11 +1116,11 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.debug("getVectorExpression returned null"); + LOG.info("getVectorExpression returned null"); return false; } } catch (Exception e) { - LOG.debug("Failed to vectorize", e); + LOG.info("Failed to vectorize", e); return false; } return true; @@ -1107,19 +1143,19 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) { return false; } - if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { - return false; + if (aggDesc.getParameters() != null) { + return validateExprNodeDesc(aggDesc.getParameters()); } // See if we can vectorize the aggregation. try { VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.debug("getAggregatorExpression returned null"); + LOG.info("getAggregatorExpression returned null"); return false; } } catch (Exception e) { - LOG.debug("Failed to vectorize", e); + LOG.info("Failed to vectorize", e); return false; } return true; @@ -1205,7 +1241,6 @@ private void fixupParentChildOperators(Operator op, Oper case REDUCESINK: case LIMIT: case EXTRACT: - case EVENT: vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext); break; default: diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q index e825ad1..25de284 100644 --- ql/src/test/queries/clientpositive/vector_char_4.q +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -46,6 +46,6 @@ create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), c explain insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; --- insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; --- select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc; \ No newline at end of file +select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index ec46630..858fe16 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -41,3 +41,16 @@ order by key desc limit 5; drop table char_2; + + +-- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc; + +explain +insert into table char_3 select cint from alltypesorc limit 10; + +insert into table char_3 select cint from alltypesorc limit 10; + +drop table char_3; diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q index c683770..4790ff0 100644 --- ql/src/test/queries/clientpositive/vector_varchar_4.q +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -46,6 +46,6 @@ create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi va explain insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; --- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; --- select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file +select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index 68d6b09..1cd30ee 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,12 +1,12 @@ SET hive.vectorized.execution.enabled=true; -drop table char_2; +drop table varchar_2; -create table char_2 ( +create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc; -insert overwrite table char_2 select * from src; +insert overwrite table varchar_2 select * from src; select key, value from src @@ -14,13 +14,13 @@ order by key asc limit 5; explain select key, value -from char_2 +from varchar_2 order by key asc limit 5; -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5; @@ -30,14 +30,26 @@ order by key desc limit 5; explain select key, value -from char_2 +from varchar_2 order by key desc limit 5; -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5; -drop table char_2; +drop table varchar_2; + +-- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc; + +explain +insert into table varchar_3 select cint from alltypesorc limit 10; + +insert into table varchar_3 select cint from alltypesorc limit 10; + +drop table varchar_3; diff --git ql/src/test/results/clientpositive/tez/vector_char_4.q.out ql/src/test/results/clientpositive/tez/vector_char_4.q.out index 4f46d06..649a0cf 100644 --- ql/src/test/results/clientpositive/tez/vector_char_4.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_4.q.out @@ -173,3 +173,53 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: Lineage: char_lazy_binary_columnar.cb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.cd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.cf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.ci EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.cs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.csi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.ct EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +501 diff --git ql/src/test/results/clientpositive/tez/vector_char_simple.q.out ql/src/test/results/clientpositive/tez/vector_char_simple.q.out index bac33ec..fe651ca 100644 --- ql/src/test/results/clientpositive/tez/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -234,3 +234,109 @@ POSTHOOK: query: drop table char_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@char_2 POSTHOOK: Output: default@char_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS CHAR(12) (type: char(12)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out index 61eabda..9c7459f 100644 --- ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out @@ -173,3 +173,53 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vsi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vt EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +501 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out index f097414..f3d9147 100644 --- ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out @@ -1,31 +1,31 @@ -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_2 ( +PREHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@char_2 -POSTHOOK: query: create table char_2 ( +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_2 -PREHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: default@char_2 -POSTHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_2 -POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select key, value from src order by key asc @@ -46,12 +46,12 @@ POSTHOOK: Input: default@src 10 val_10 100 val_100 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY @@ -69,7 +69,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -107,19 +107,19 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 0 val_0 0 val_0 @@ -146,12 +146,12 @@ POSTHOOK: Input: default@src 97 val_97 96 val_96 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY @@ -169,7 +169,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -207,30 +207,136 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 98 val_98 98 val_98 97 val_97 97 val_97 96 val_96 -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_2 -PREHOOK: Output: default@char_2 -POSTHOOK: query: drop table char_2 +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_2 -POSTHOOK: Output: default@char_2 +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_3 +PREHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_3 +POSTHOOK: Lineage: varchar_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table varchar_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_3 +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: drop table varchar_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_3 +POSTHOOK: Output: default@varchar_3 diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out index e3b555e..81e5a28 100644 --- ql/src/test/results/clientpositive/vector_char_4.q.out +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -200,3 +200,53 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### +PREHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: Lineage: char_lazy_binary_columnar.cb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.cd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.cf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.ci EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.cs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.csi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.ct EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +501 diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index 72dc8aa..fbe1b40 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -220,3 +220,98 @@ POSTHOOK: query: drop table char_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@char_2 POSTHOOK: Output: default@char_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table char_3 ( + field char(12) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_3 +PREHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS CHAR(12) (type: char(12)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.char_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@char_3 +POSTHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@char_3 +POSTHOOK: Lineage: char_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table char_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_3 +PREHOOK: Output: default@char_3 +POSTHOOK: query: drop table char_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_3 +POSTHOOK: Output: default@char_3 diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out index a3b8707..aa9fa1b 100644 --- ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -200,3 +200,53 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### +PREHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vsi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vt EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +501 diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 1c77c39..1c774af 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -1,31 +1,31 @@ -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_2 ( +PREHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@char_2 -POSTHOOK: query: create table char_2 ( +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: create table varchar_2 ( key varchar(10), value varchar(20) ) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_2 -PREHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: default@char_2 -POSTHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_2 -POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select key, value from src order by key asc @@ -46,12 +46,12 @@ POSTHOOK: Input: default@src 10 val_10 100 val_100 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY @@ -64,7 +64,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -100,19 +100,19 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key asc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 0 val_0 0 val_0 @@ -139,12 +139,12 @@ POSTHOOK: Input: default@src 97 val_97 96 val_96 PREHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY POSTHOOK: query: explain select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY @@ -157,7 +157,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: char_2 + alias: varchar_2 Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) @@ -193,30 +193,125 @@ STAGE PLANS: PREHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 PREHOOK: type: QUERY -PREHOOK: Input: default@char_2 +PREHOOK: Input: default@varchar_2 #### A masked pattern was here #### POSTHOOK: query: -- should match the query from src select key, value -from char_2 +from varchar_2 order by key desc limit 5 POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_2 +POSTHOOK: Input: default@varchar_2 #### A masked pattern was here #### 98 val_98 98 val_98 97 val_97 97 val_97 96 val_96 -PREHOOK: query: drop table char_2 +PREHOOK: query: drop table varchar_2 PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_2 -PREHOOK: Output: default@char_2 -POSTHOOK: query: drop table char_2 +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_2 -POSTHOOK: Output: default@char_2 +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: -- Implicit conversion. Occurs in reduce-side under Tez. +create table varchar_3 ( + field varchar(25) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_3 +PREHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.varchar_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_3 +POSTHOOK: Lineage: varchar_3.field EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: drop table varchar_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_3 +PREHOOK: Output: default@varchar_3 +POSTHOOK: query: drop table varchar_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_3 +POSTHOOK: Output: default@varchar_3