diff --git data/files/tjoin1.txt data/files/tjoin1.txt new file mode 100644 index 0000000..897e0c5 --- /dev/null +++ data/files/tjoin1.txt @@ -0,0 +1,3 @@ +0|10|15 +1|20|25 +2|\N|50 \ No newline at end of file diff --git data/files/tjoin2.txt data/files/tjoin2.txt new file mode 100644 index 0000000..24820e9 --- /dev/null +++ data/files/tjoin2.txt @@ -0,0 +1,4 @@ +0|10|BB +1|15|DD +2|\N|EE +3|10|FF \ No newline at end of file diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 61d376a..6445b09 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -211,6 +211,8 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ vector_left_outer_join.q,\ + vector_left_outer_join2.q,\ + vector_left_outer_join3.q,\ vector_mapjoin_reduce.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index 858604c..2dee46d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -86,7 +86,8 @@ public void processOp(Object data, int tag) throws HiveException { } for (int i = 0; i < vrg.projectionSize; i++) { ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; - singleRow[i] = valueWriters[i].writeValue(vectorColumn, batchIndex); + int adjustedIndex = (vectorColumn.isRepeating ? 0 : batchIndex); + singleRow[i] = valueWriters[i].writeValue(vectorColumn, adjustedIndex); } return singleRow; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 2c8aee1..dc68d64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** - * The vectorized version of the MapJoinOperator. + * The vectorized pass-through version of the MapJoinOperator. */ public class VectorMapJoinOperator extends MapJoinOperator implements VectorizationContextRegion { @@ -52,10 +52,7 @@ */ private static final long serialVersionUID = 1L; - private VectorExpression[] keyExpressions; - private VectorExpression[] bigTableFilterExpressions; - private VectorExpression[] bigTableValueExpressions; private VectorizationContext vOutContext; @@ -63,17 +60,11 @@ // transient. //--------------------------------------------------------------------------- + private transient VectorExpressionWriter[] rowWriters; + private transient VectorizedRowBatch outputBatch; - private transient VectorExpressionWriter[] valueWriters; - private transient Map outputVectorAssigners; - // These members are used as out-of-band params - // for the inner-loop supper.processOp callbacks - // - private transient int batchIndex; - private transient VectorHashKeyWrapper[] keyValues; - private transient VectorHashKeyWrapperBatch keyWrapperBatch; - private transient VectorExpressionWriter[] keyOutputWriters; + private transient Map outputVectorAssigners; private transient VectorizedRowBatchCtx vrbCtx = null; @@ -89,22 +80,13 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) MapJoinDesc desc = (MapJoinDesc) conf; this.conf = desc; - order = desc.getTagOrder(); - numAliases = desc.getExprs().size(); - posBigTable = (byte) desc.getPosBigTable(); - filterMaps = desc.getFilterMap(); - noOuterJoin = desc.isNoOuterJoin(); - - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); - - List keyDesc = desc.getKeys().get(posBigTable); - keyExpressions = vContext.getVectorExpressions(keyDesc); - - // We're only going to evaluate the big table vectorized expressions, - Map> exprs = desc.getExprs(); - bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); + if (desc.isNoOuterJoin()) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class MapJoinOperator. + Map> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get((byte) desc.getPosBigTable()), + VectorExpressionDescriptor.Mode.FILTER); + } // We are making a new output vectorized row batch. vOutContext = new VectorizationContext(desc.getOutputColumnNames()); @@ -113,69 +95,32 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) @Override public void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); - - List keyDesc = conf.getKeys().get(posBigTable); - keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); - vrbCtx = new VectorizedRowBatchCtx(); - vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); - - outputBatch = vrbCtx.createVectorizedRowBatch(); - - keyWrapperBatch =VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); - - Map> valueExpressions = conf.getExprs(); - List bigTableExpressions = valueExpressions.get(posBigTable); + // Use a final variable to parameterize the processVectorInspector closure. + final int posBigTable = conf.getPosBigTable(); - VectorExpressionWriterFactory.processVectorExpressions( - bigTableExpressions, - new VectorExpressionWriterFactory.ListOIDClosure() { + // We need a input object inspector that is for the row we will extract out of the + // big table vectorized row batch, not for example, an original inspector for an + // ORC table, etc. + VectorExpressionWriterFactory.processVectorInspector( + (StructObjectInspector) inputObjInspectors[posBigTable], + new VectorExpressionWriterFactory.SingleOIDClosure() { @Override - public void assign(VectorExpressionWriter[] writers, List oids) { - valueWriters = writers; - joinValuesObjectInspectors[posBigTable] = oids; + public void assign(VectorExpressionWriter[] writers, + ObjectInspector objectInspector) { + rowWriters = writers; + inputObjInspectors[posBigTable] = objectInspector; } - }); - - // We're hijacking the big table evaluators an replace them with our own custom ones - // which are going to return values from the input batch vector expressions - List vectorNodeEvaluators = new ArrayList(bigTableExpressions.size()); - - for(int i=0; i(desc) { - int columnIndex;; - int writerIndex; - - public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) { - this.columnIndex = columnIndex; - this.writerIndex = writerIndex; - return this; } + ); - @Override - public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { - throw new HiveException("should never reach here"); - } + // Call MapJoinOperator with new input inspector. + super.initializeOp(hconf); - @Override - protected Object _evaluate(Object row, int version) throws HiveException { - VectorizedRowBatch inBatch = (VectorizedRowBatch) row; - int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex; - return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex); - } - }.initVectorExpr(vectorExpr.getOutputColumn(), i); - vectorNodeEvaluators.add(eval); - } - // Now replace the old evaluators with our own - joinValues[posBigTable] = vectorNodeEvaluators; + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); - // Filtering is handled in the input batch processing - filterMaps[posBigTable] = null; + outputBatch = vrbCtx.createVectorizedRowBatch(); outputVectorAssigners = new HashMap(); } @@ -208,51 +153,51 @@ private void flushOutput() throws HiveException { @Override public void closeOp(boolean aborted) throws HiveException { + // Make our super class finish before we flush our output batch. + super.closeOp(aborted); + if (!aborted && 0 < outputBatch.size) { flushOutput(); } } @Override - protected void setMapJoinKey(ReusableGetAdaptor dest, Object row, byte alias) - throws HiveException { - dest.setFromVector(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch); - } - - @Override public void processOp(Object row, int tag) throws HiveException { byte alias = (byte) tag; VectorizedRowBatch inBatch = (VectorizedRowBatch) row; - if (null != bigTableFilterExpressions) { + if (conf.isNoOuterJoin() && bigTableFilterExpressions != null) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class MapJoinOperator. for(VectorExpression ve:bigTableFilterExpressions) { ve.evaluate(inBatch); } } - if (null != bigTableValueExpressions) { - for(VectorExpression ve: bigTableValueExpressions) { - ve.evaluate(inBatch); - } + for (int i = 0; i < inBatch.size; i++) { + Object rowFromBatch = getRowObject(inBatch, i); + super.processOp(rowFromBatch, tag); } + } - keyWrapperBatch.evaluateBatch(inBatch); - keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); - - // This implementation of vectorized JOIN is delegating all the work - // to the row-mode implementation by hijacking the big table node evaluators - // and calling the row-mode join processOp for each row in the input batch. - // Since the JOIN operator is not fully vectorized anyway atm (due to the use - // of row-mode small-tables) this is a reasonable trade-off. - // - for(batchIndex=0; batchIndex < inBatch.size; ++batchIndex) { - super.processOp(row, tag); + private Object[] getRowObject(VectorizedRowBatch vrg, int rowIndex) + throws HiveException { + int batchIndex = rowIndex; + if (vrg.selectedInUse) { + batchIndex = vrg.selected[rowIndex]; } - - // Set these two to invalid values so any attempt to use them - // outside the inner loop results in NPE/OutOfBounds errors - batchIndex = -1; - keyValues = null; + Object[] singleRow = new Object[vrg.projectionSize]; + for (int i = 0; i < vrg.projectionSize; i++) { + ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; + if (vectorColumn != null) { + int adjustedIndex = (vectorColumn.isRepeating ? 0 : batchIndex); + singleRow[i] = rowWriters[i].writeValue(vectorColumn, adjustedIndex); + } else { + // Some columns from tables are not used. + singleRow[i] = null; + } + } + return singleRow; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java index 0ae0186..390e2e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java @@ -87,7 +87,8 @@ public void processOp(Object data, int tag) throws HiveException { for (int i = 0; i < vrg.projectionSize; i++) { ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; if (vectorColumn != null) { - singleRow[i] = rowWriters[i].writeValue(vectorColumn, batchIndex); + int adjustedIndex = (vectorColumn.isRepeating ? 0 : batchIndex); + singleRow[i] = rowWriters[i].writeValue(vectorColumn, adjustedIndex); } else { // Some columns from tables are not used. singleRow[i] = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index ecd92d5..bb41c3d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -40,7 +40,7 @@ /** * VectorSMBJoinOperator. - * Implements the vectorized SMB join operator. The implementation relies on the row-mode SMB join operator. + * Implements the vectorized pass-through SMB join operator. The implementation relies on the row-mode SMB join operator. * It accepts a vectorized batch input from the big table and iterates over the batch, calling the parent row-mode * implementation for each row in the batch. */ @@ -51,39 +51,21 @@ private static final long serialVersionUID = 1L; - private VectorExpression[] bigTableValueExpressions; - private VectorExpression[] bigTableFilterExpressions; - private VectorExpression[] keyExpressions; - - private VectorExpressionWriter[] keyOutputWriters; - private VectorizationContext vOutContext; // The above members are initialized by the constructor and must not be // transient. //--------------------------------------------------------------------------- - private transient VectorizedRowBatch outputBatch; - - private transient VectorizedRowBatchCtx vrbCtx = null; + private transient VectorExpressionWriter[] rowWriters; - private transient VectorHashKeyWrapperBatch keyWrapperBatch; + private transient VectorizedRowBatch outputBatch; private transient Map outputVectorAssigners; - private transient int batchIndex = -1; - - private transient VectorHashKeyWrapper[] keyValues; - - private transient SMBJoinKeyEvaluator keyEvaluator; - - private transient VectorExpressionWriter[] valueWriters; - - private interface SMBJoinKeyEvaluator { - List evaluate(VectorHashKeyWrapper kw) throws HiveException; -} + private transient VectorizedRowBatchCtx vrbCtx = null; public VectorSMBMapJoinOperator() { super(); @@ -95,172 +77,81 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf SMBJoinDesc desc = (SMBJoinDesc) conf; this.conf = desc; - order = desc.getTagOrder(); - numAliases = desc.getExprs().size(); - posBigTable = (byte) desc.getPosBigTable(); - filterMaps = desc.getFilterMap(); - noOuterJoin = desc.isNoOuterJoin(); - - // Must obtain vectorized equivalents for filter and value expressions - - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); - List keyDesc = desc.getKeys().get(posBigTable); - keyExpressions = vContext.getVectorExpressions(keyDesc); - keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); - - Map> exprs = desc.getExprs(); - bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); + if (desc.isNoOuterJoin()) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class SMBMapJoinOperator. + Map> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get((byte) desc.getPosBigTable()), + VectorExpressionDescriptor.Mode.FILTER); + } // We are making a new output vectorized row batch. vOutContext = new VectorizationContext(desc.getOutputColumnNames()); vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); } - - @Override - protected List smbJoinComputeKeys(Object row, byte alias) throws HiveException { - if (alias == this.posBigTable) { - VectorizedRowBatch inBatch = (VectorizedRowBatch) row; - return keyEvaluator.evaluate(keyValues[batchIndex]); - } else { - return super.smbJoinComputeKeys(row, alias); - } - } - + @Override protected void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); - - vrbCtx = new VectorizedRowBatchCtx(); - vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); - - outputBatch = vrbCtx.createVectorizedRowBatch(); - - keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); - - outputVectorAssigners = new HashMap(); - - // This key evaluator translates from the vectorized VectorHashKeyWrapper format - // into the row-mode MapJoinKey - keyEvaluator = new SMBJoinKeyEvaluator() { - private List key; - - public SMBJoinKeyEvaluator init() { - key = new ArrayList(); - for(int i = 0; i < keyExpressions.length; ++i) { - key.add(null); - } - return this; - } - - @Override - public List evaluate(VectorHashKeyWrapper kw) throws HiveException { - for(int i = 0; i < keyExpressions.length; ++i) { - key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i])); - } - return key; - }; - }.init(); - - Map> valueExpressions = conf.getExprs(); - List bigTableExpressions = valueExpressions.get(posBigTable); - - // We're hijacking the big table evaluators and replacing them with our own custom ones - // which are going to return values from the input batch vector expressions - List vectorNodeEvaluators = new ArrayList(bigTableExpressions.size()); - - VectorExpressionWriterFactory.processVectorExpressions( - bigTableExpressions, - new VectorExpressionWriterFactory.ListOIDClosure() { - + // Use a final variable to parameterize the processVectorInspector closure. + final int posBigTable = conf.getPosBigTable(); + + // We need a input object inspector that is for the row we will extract out of the + // big table vectorized row batch, not for example, an original inspector for an + // ORC table, etc. + VectorExpressionWriterFactory.processVectorInspector( + (StructObjectInspector) inputObjInspectors[posBigTable], + new VectorExpressionWriterFactory.SingleOIDClosure() { @Override - public void assign(VectorExpressionWriter[] writers, List oids) { - valueWriters = writers; - joinValuesObjectInspectors[posBigTable] = oids; + public void assign(VectorExpressionWriter[] writers, + ObjectInspector objectInspector) { + rowWriters = writers; + inputObjInspectors[posBigTable] = objectInspector; } - }); - - for(int i=0; i(desc) { - int columnIndex;; - int writerIndex; + // Call VectorSMBMapJoinOperator with new input inspector. + super.initializeOp(hconf); - public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) { - this.columnIndex = columnIndex; - this.writerIndex = writerIndex; - return this; - } + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); - @Override - public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { - throw new HiveException("should never reach here"); - } + outputBatch = vrbCtx.createVectorizedRowBatch(); - @Override - protected Object _evaluate(Object row, int version) throws HiveException { - VectorizedRowBatch inBatch = (VectorizedRowBatch) row; - int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex; - return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex); - } - }.initVectorExpr(vectorExpr.getOutputColumn(), i); - vectorNodeEvaluators.add(eval); - } - // Now replace the old evaluators with our own - joinValues[posBigTable] = vectorNodeEvaluators; - + outputVectorAssigners = new HashMap(); } - + @Override public void processOp(Object row, int tag) throws HiveException { byte alias = (byte) tag; - + if (alias != this.posBigTable) { super.processOp(row, tag); } else { VectorizedRowBatch inBatch = (VectorizedRowBatch) row; - if (null != bigTableFilterExpressions) { + if (conf.isNoOuterJoin() && bigTableFilterExpressions != null) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class SMBMapJoinOperator. for(VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(inBatch); } } - - if (null != bigTableValueExpressions) { - for(VectorExpression ve : bigTableValueExpressions) { - ve.evaluate(inBatch); - } - } - - keyWrapperBatch.evaluateBatch(inBatch); - keyValues = keyWrapperBatch.getVectorHashKeyWrappers(); - - // This implementation of vectorized JOIN is delegating all the work - // to the row-mode implementation by hijacking the big table node evaluators - // and calling the row-mode join processOp for each row in the input batch. - // Since the JOIN operator is not fully vectorized anyway at the moment - // (due to the use of row-mode small-tables) this is a reasonable trade-off. - // - for(batchIndex=0; batchIndex < inBatch.size; ++batchIndex ) { - super.processOp(row, tag); + + for (int i = 0; i < inBatch.size; i++) { + Object rowFromBatch = getRowObject(inBatch, i); + super.processOp(rowFromBatch, tag); } - - // Set these two to invalid values so any attempt to use them - // outside the inner loop results in NPE/OutOfBounds errors - batchIndex = -1; - keyValues = null; } } @Override public void closeOp(boolean aborted) throws HiveException { + // Make our super class finish before we flush our output batch. super.closeOp(aborted); + if (!aborted && 0 < outputBatch.size) { flushOutput(); } @@ -289,6 +180,26 @@ private void flushOutput() throws HiveException { outputBatch.reset(); } + private Object[] getRowObject(VectorizedRowBatch vrg, int rowIndex) + throws HiveException { + int batchIndex = rowIndex; + if (vrg.selectedInUse) { + batchIndex = vrg.selected[rowIndex]; + } + Object[] singleRow = new Object[vrg.projectionSize]; + for (int i = 0; i < vrg.projectionSize; i++) { + ColumnVector vectorColumn = vrg.cols[vrg.projectedColumns[i]]; + if (vectorColumn != null) { + int adjustedIndex = (vectorColumn.isRepeating ? 0 : batchIndex); + singleRow[i] = rowWriters[i].writeValue(vectorColumn, adjustedIndex); + } else { + // Some columns from tables are not used. + singleRow[i] = null; + } + } + return singleRow; + } + @Override public VectorizationContext getOuputVectorizationContext() { return vOutContext; diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q new file mode 100644 index 0000000..db820e2 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -0,0 +1,26 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +set hive.vectorized.execution.enabled=false; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +set hive.vectorized.execution.enabled=true; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); diff --git ql/src/test/queries/clientpositive/vector_left_outer_join3.q ql/src/test/queries/clientpositive/vector_left_outer_join3.q new file mode 100644 index 0000000..b50ddab --- /dev/null +++ ql/src/test/queries/clientpositive/vector_left_outer_join3.q @@ -0,0 +1,32 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +set hive.vectorized.execution.enabled=false; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +set hive.vectorized.execution.enabled=true; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); diff --git ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out new file mode 100644 index 0000000..c163dce --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out new file mode 100644 index 0000000..b006ae8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out new file mode 100644 index 0000000..a16f4ce --- /dev/null +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + tjoin2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + tjoin2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/vector_left_outer_join3.q.out ql/src/test/results/clientpositive/vector_left_outer_join3.q.out new file mode 100644 index 0000000..103982f --- /dev/null +++ ql/src/test/results/clientpositive/vector_left_outer_join3.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) + CLUSTERED BY (RNUM) + SORTED BY (RNUM) INTO 1 BUCKETS + STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: rnum (type: int), c2 (type: int) + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + Execution mode: vectorized + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col1 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +2 NULL 50 NULL +0 10 15 NULL +1 20 25 NULL