diff --git data/files/tjoin1.txt data/files/tjoin1.txt new file mode 100644 index 0000000..897e0c5 --- /dev/null +++ data/files/tjoin1.txt @@ -0,0 +1,3 @@ +0|10|15 +1|20|25 +2|\N|50 \ No newline at end of file diff --git data/files/tjoin2.txt data/files/tjoin2.txt new file mode 100644 index 0000000..24820e9 --- /dev/null +++ data/files/tjoin2.txt @@ -0,0 +1,4 @@ +0|10|BB +1|15|DD +2|\N|EE +3|10|FF \ No newline at end of file diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 61d376a..6445b09 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -211,6 +211,8 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ vector_left_outer_join.q,\ + vector_left_outer_join2.q,\ + vector_left_outer_join3.q,\ vector_mapjoin_reduce.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index b82fcb2..f7a7893 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -151,20 +151,25 @@ public OpTuple(Class descClass, Class> opClass) { } + public static Operator getVectorOperator( + Class> opClass, T conf, VectorizationContext vContext) throws HiveException { + try { + Operator op = (Operator) opClass.getDeclaredConstructor( + VectorizationContext.class, OperatorDesc.class).newInstance( + vContext, conf); + return op; + } catch (Exception e) { + e.printStackTrace(); + throw new HiveException(e); + } + } + public static Operator getVectorOperator(T conf, VectorizationContext vContext) throws HiveException { Class descClass = (Class) conf.getClass(); for (OpTuple o : vectorOpvec) { if (o.descClass == descClass) { - try { - Operator op = (Operator) o.opClass.getDeclaredConstructor( - VectorizationContext.class, OperatorDesc.class).newInstance( - vContext, conf); - return op; - } catch (Exception e) { - e.printStackTrace(); - throw new HiveException(e); - } + return getVectorOperator(o.opClass, conf, vContext); } } throw new HiveException("No vector operator for descriptor class " diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index 858604c..2dee46d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -86,7 +86,8 @@ public void processOp(Object data, int tag) throws HiveException { } for (int i = 0; i < vrg.projectionSize; i++) { ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; - singleRow[i] = valueWriters[i].writeValue(vectorColumn, batchIndex); + int adjustedIndex = (vectorColumn.isRepeating ? 0 : batchIndex); + singleRow[i] = valueWriters[i].writeValue(vectorColumn, adjustedIndex); } return singleRow; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java new file mode 100644 index 0000000..86ea9a3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java @@ -0,0 +1,211 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; + +/** + * The vectorized pass-through version of the MapJoinOperator. + */ +public class VectorMapJoinOuterFilteredOperator extends MapJoinOperator implements VectorizationContextRegion { + + private static final Log LOG = LogFactory.getLog( + VectorMapJoinOuterFilteredOperator.class.getName()); + + /** + * + */ + private static final long serialVersionUID = 1L; + + private VectorExpression[] bigTableFilterExpressions; + + private VectorizationContext vOutContext; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorExpressionWriter[] rowWriters; + + private transient Object[] singleRow; + + private transient VectorizedRowBatch outputBatch; + + private transient Map outputVectorAssigners; + + private transient VectorizedRowBatchCtx vrbCtx = null; + + public VectorMapJoinOuterFilteredOperator() { + super(); + } + + + public VectorMapJoinOuterFilteredOperator (VectorizationContext vContext, OperatorDesc conf) + throws HiveException { + this(); + + MapJoinDesc desc = (MapJoinDesc) conf; + this.conf = desc; + + if (desc.isNoOuterJoin()) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class MapJoinOperator. + Map> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get((byte) desc.getPosBigTable()), + VectorExpressionDescriptor.Mode.FILTER); + } + + // We are making a new output vectorized row batch. + vOutContext = new VectorizationContext(desc.getOutputColumnNames()); + vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias()); + } + + @Override + public void initializeOp(Configuration hconf) throws HiveException { + + // Use a final variable to parameterize the processVectorInspector closure. + final int posBigTable = conf.getPosBigTable(); + + // We need a input object inspector that is for the row we will extract out of the + // big table vectorized row batch, not for example, an original inspector for an + // ORC table, etc. + VectorExpressionWriterFactory.processVectorInspector( + (StructObjectInspector) inputObjInspectors[posBigTable], + new VectorExpressionWriterFactory.SingleOIDClosure() { + @Override + public void assign(VectorExpressionWriter[] writers, + ObjectInspector objectInspector) { + rowWriters = writers; + inputObjInspectors[posBigTable] = objectInspector; + } + } + ); + + singleRow = new Object[rowWriters.length]; + + // Call MapJoinOperator with new input inspector. + super.initializeOp(hconf); + + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); + + outputBatch = vrbCtx.createVectorizedRowBatch(); + + outputVectorAssigners = new HashMap(); + } + + /** + * 'forwards' the (row-mode) record into the (vectorized) output batch + */ + @Override + protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { + Object[] values = (Object[]) row; + VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); + if (null == vcas) { + vcas = VectorColumnAssignFactory.buildAssigners( + outputBatch, outputOI, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames()); + outputVectorAssigners.put(outputOI, vcas); + } + for (int i=0; i outputVectorAssigners; + + private transient VectorizedRowBatchCtx vrbCtx = null; + + public VectorSMBMapJoinOuterFilteredOperator() { + super(); + } + + public VectorSMBMapJoinOuterFilteredOperator(VectorizationContext vContext, OperatorDesc conf) + throws HiveException { + this(); + SMBJoinDesc desc = (SMBJoinDesc) conf; + this.conf = desc; + + + if (desc.isNoOuterJoin()) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class SMBMapJoinOperator. + Map> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get((byte) desc.getPosBigTable()), + VectorExpressionDescriptor.Mode.FILTER); + } + + // We are making a new output vectorized row batch. + vOutContext = new VectorizationContext(desc.getOutputColumnNames()); + vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + // Use a final variable to parameterize the processVectorInspector closure. + final int posBigTable = conf.getPosBigTable(); + + // We need a input object inspector that is for the row we will extract out of the + // big table vectorized row batch, not for example, an original inspector for an + // ORC table, etc. + VectorExpressionWriterFactory.processVectorInspector( + (StructObjectInspector) inputObjInspectors[posBigTable], + new VectorExpressionWriterFactory.SingleOIDClosure() { + @Override + public void assign(VectorExpressionWriter[] writers, + ObjectInspector objectInspector) { + rowWriters = writers; + inputObjInspectors[posBigTable] = objectInspector; + } + } + ); + + singleRow = new Object[rowWriters.length]; + + // Call VectorSMBMapJoinOuterFilteredOperator with new input inspector. + super.initializeOp(hconf); + + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); + + outputBatch = vrbCtx.createVectorizedRowBatch(); + + outputVectorAssigners = new HashMap(); + } + + @Override + public void processOp(Object row, int tag) throws HiveException { + byte alias = (byte) tag; + + if (alias != this.posBigTable) { + super.processOp(row, tag); + } else { + + VectorizedRowBatch inBatch = (VectorizedRowBatch) row; + + if (conf.isNoOuterJoin() && bigTableFilterExpressions != null) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class SMBMapJoinOperator. + for(VectorExpression ve : bigTableFilterExpressions) { + ve.evaluate(inBatch); + } + } + + for (int i = 0; i < inBatch.size; i++) { + Object rowFromBatch = getRowObject(inBatch, i); + super.processOp(rowFromBatch, tag); + } + } + } + + @Override + public void closeOp(boolean aborted) throws HiveException { + // Make our super class finish before we flush our output batch. + super.closeOp(aborted); + + if (!aborted && 0 < outputBatch.size) { + flushOutput(); + } + } + + @Override + protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { + Object[] values = (Object[]) row; + VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); + if (null == vcas) { + vcas = VectorColumnAssignFactory.buildAssigners( + outputBatch, outputOI, vOutContext.getProjectionColumnMap(), conf.getOutputColumnNames()); + outputVectorAssigners.put(outputOI, vcas); + } + for (int i = 0; i < values.length; ++i) { + vcas[i].assignObjectValue(values[i], outputBatch.size); + } + ++outputBatch.size; + if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + flushOutput(); + } + } + + private void flushOutput() throws HiveException { + forward(outputBatch, null); + outputBatch.reset(); + } + + private Object[] getRowObject(VectorizedRowBatch vrg, int rowIndex) + throws HiveException { + int batchIndex = rowIndex; + if (vrg.selectedInUse) { + batchIndex = vrg.selected[rowIndex]; + } + + for (int i = 0; i < vrg.projectionSize; i++) { + ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; + if (vectorColumn != null) { + int adjustedIndex = (vectorColumn.isRepeating ? 0 : batchIndex); + singleRow[i] = rowWriters[i].writeValue(vectorColumn, adjustedIndex); + } else { + // Some columns from tables are not used. + singleRow[i] = null; + } + } + return singleRow; + } + + @Override + public VectorizationContext getOuputVectorizationContext() { + return vOutContext; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index aca4273..f772e72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -43,6 +43,10 @@ import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; @@ -1296,6 +1300,28 @@ private void fixupParentChildOperators(Operator op, switch (op.getType()) { case MAPJOIN: + { + MapJoinDesc mapJoinDesc = (MapJoinDesc) op.getConf(); + List bigTableFilters = mapJoinDesc.getFilters().get((byte) mapJoinDesc.getPosBigTable()); + boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0); + Class> opClass = null; + if (op instanceof MapJoinOperator) { + if (!isOuterAndFiltered) { + opClass = VectorMapJoinOperator.class; + } else { + opClass = VectorMapJoinOuterFilteredOperator.class; + } + } else if (op instanceof SMBMapJoinOperator) { + if (!isOuterAndFiltered) { + opClass = VectorSMBMapJoinOperator.class; + } else { + opClass = VectorSMBMapJoinOuterFilteredOperator.class; + } + } + vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext); + LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); + } + break; case GROUPBY: case FILTER: case SELECT: diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q new file mode 100644 index 0000000..db820e2 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -0,0 +1,26 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +set hive.vectorized.execution.enabled=false; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +set hive.vectorized.execution.enabled=true; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); diff --git ql/src/test/queries/clientpositive/vector_left_outer_join3.q ql/src/test/queries/clientpositive/vector_left_outer_join3.q new file mode 100644 index 0000000..b50ddab --- /dev/null +++ ql/src/test/queries/clientpositive/vector_left_outer_join3.q @@ -0,0 +1,32 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +set hive.vectorized.execution.enabled=false; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +set hive.vectorized.execution.enabled=true; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); diff --git ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out new file mode 100644 index 0000000..c163dce --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out new file mode 100644 index 0000000..b006ae8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out new file mode 100644 index 0000000..a16f4ce --- /dev/null +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + tjoin2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + tjoin2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/vector_left_outer_join3.q.out ql/src/test/results/clientpositive/vector_left_outer_join3.q.out new file mode 100644 index 0000000..103982f --- /dev/null +++ ql/src/test/results/clientpositive/vector_left_outer_join3.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Execution mode: vectorized + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL