diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index b7abf0d..3375aa3 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -220,6 +220,9 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_if_expr.q,\ vector_interval_1.q,\ vector_interval_2.q,\ + vector_join30.q,\ + vector_join_filters.q,\ + vector_join_nulls.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ vector_mr_diff_schema_alias.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index dcea8ae..4a16b4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -645,8 +645,7 @@ public static String displayBytes(byte[] bytes, int start, int length) { public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { StringBuffer sb = new StringBuffer(); sb.append(prefix + " row " + index + " "); - for (int i = 0; i < batch.projectionSize; i++) { - int column = batch.projectedColumns[i]; + for (int column = 0; column < batch.cols.length; column++) { ColumnVector colVector = batch.cols[column]; if (colVector == null) { sb.append("(null colVector " + column + ")"); @@ -666,7 +665,7 @@ public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, Strin if (bytes == null) { sb.append("(Unexpected null bytes with start " + start + " length " + length + ")"); } else { - sb.append(displayBytes(bytes, start, length)); + sb.append("bytes: '" + displayBytes(bytes, start, length) + "'"); } } else if (colVector instanceof DecimalColumnVector) { sb.append(((DecimalColumnVector) colVector).vector[index].toString()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index f272b6d..8edde2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -460,8 +460,8 @@ protected void determineCommonInfo(boolean isOuter) { LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); - LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames)); + LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); + LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames)); } setupVOutContext(conf.getOutputColumnNames()); @@ -493,7 +493,7 @@ protected void determineCommonInfo(boolean isOuter) { */ protected void setupVOutContext(List outputColumnNames) { if (LOG.isDebugEnabled()) { - LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); + LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); } if (outputColumnNames.size() != outputProjection.length) { throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeNames) + " length mismatch"); @@ -719,7 +719,7 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, * Common one time setup by native vectorized map join operator's processOp. */ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - LOG.info("VectorMapJoinInnerCommonOperator commonSetup begin..."); + LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); if (LOG.isDebugEnabled()) { displayBatchColumns(batch, "batch"); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 743a975..2969d2f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -371,10 +371,8 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, * The big table batch. * @param hashMapResult * The hash map results for the repeated key. - * @return - * The new count of selected rows. */ - protected int generateHashMapResultRepeatedAll(VectorizedRowBatch batch, + protected void generateHashMapResultRepeatedAll(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult) throws IOException, HiveException { int[] selected = batch.selected; @@ -398,7 +396,7 @@ protected int generateHashMapResultRepeatedAll(VectorizedRowBatch batch, batch.selected, 0, batch.size); } - return numSel; + batch.size = numSel; } //----------------------------------------------------------------------------------------------- @@ -460,7 +458,7 @@ private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, // int length = output.getLength() - offset; rowBytesContainer.finishRow(); -// LOG.info("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); +// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); } protected void spillHashMapBatch(VectorizedRowBatch batch, @@ -511,14 +509,14 @@ protected void reloadHashTable(HashPartition partition, currentSmallTable); needHashTableSetup = true; - LOG.info(CLASS_NAME + " reloadHashTable!"); + LOG.debug(CLASS_NAME + " reloadHashTable!"); } @Override protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition) throws HiveException, IOException { - LOG.info(CLASS_NAME + " reProcessBigTable enter..."); + LOG.debug(CLASS_NAME + " reProcessBigTable enter..."); int rowCount = 0; int batchCount = 0; @@ -534,14 +532,14 @@ protected void reProcessBigTable(HybridHashTableContainer.HashPartition partitio int offset = bigTable.currentOffset(); int length = bigTable.currentLength(); -// LOG.info(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length); +// LOG.debug(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length); bigTableVectorDeserializeRow.setBytes(bytes, offset, length); bigTableVectorDeserializeRow.deserializeByValue(spillReplayBatch, spillReplayBatch.size); spillReplayBatch.size++; if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { - LOG.info("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows"); + LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows"); process(spillReplayBatch, posBigTable); // call process once we have a full batch spillReplayBatch.reset(); batchCount++; @@ -549,18 +547,18 @@ protected void reProcessBigTable(HybridHashTableContainer.HashPartition partitio } // Process the row batch that has less than DEFAULT_SIZE rows if (spillReplayBatch.size > 0) { - LOG.info("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows"); + LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows"); process(spillReplayBatch, posBigTable); spillReplayBatch.reset(); batchCount++; } bigTable.clear(); } catch (Exception e) { - LOG.info(CLASS_NAME + " reProcessBigTable exception! " + e); + LOG.debug(CLASS_NAME + " reProcessBigTable exception! " + e); throw new HiveException(e); } - LOG.info(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed"); + LOG.debug(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed"); } @@ -622,7 +620,7 @@ public void closeOp(boolean aborted) throws HiveException { if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } - LOG.info("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed"); + LOG.debug("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed"); } //----------------------------------------------------------------------------------------------- @@ -631,6 +629,23 @@ public void closeOp(boolean aborted) throws HiveException { * Debug. */ + public boolean verifyMonotonicallyIncreasing(int[] selected, int size) { + + if (size == 0) { + return true; + } + int prevBatchIndex = selected[0]; + + for (int i = 1; i < size; i++) { + int batchIndex = selected[i]; + if (batchIndex <= prevBatchIndex) { + return false; + } + prevBatchIndex = batchIndex; + } + return true; + } + public static String intArrayToRangesString(int selection[], int size) { if (size == 0) { return "[]"; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java index 3132531..f18b982 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java @@ -129,22 +129,10 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { * @param batch * The big table batch with any matching and any non matching rows both as * selected in use. - * @param allMatchs - * A subset of the rows of the batch that are matches. * @param allMatchCount * Number of matches in allMatchs. - * @param equalKeySeriesValueCounts - * For each equal key series, whether the number of (empty) small table values. - * @param equalKeySeriesAllMatchIndices - * For each equal key series, the logical index into allMatchs. - * @param equalKeySeriesDuplicateCounts - * For each equal key series, the number of duplicates or equal keys. * @param equalKeySeriesCount * Number of single value matches. - * @param spills - * A subset of the rows of the batch that are spills. - * @param spillHashMapResultIndices - * For each entry in spills, the index into the hashMapResult. * @param spillCount * Number of spills in spills. * @param hashTableResults @@ -154,15 +142,16 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { * Number of entries in hashMapResults. * **/ - protected int finishInnerBigOnly(VectorizedRowBatch batch, - int[] allMatchs, int allMatchCount, - long[] equalKeySeriesValueCounts, int[] equalKeySeriesAllMatchIndices, - int[] equalKeySeriesDuplicateCounts, int equalKeySeriesCount, - int[] spills, int[] spillHashMapResultIndices, int spillCount, + protected void finishInnerBigOnly(VectorizedRowBatch batch, + int allMatchCount, int equalKeySeriesCount, int spillCount, VectorMapJoinHashTableResult[] hashTableResults, int hashMapResultCount) throws HiveException, IOException { - int numSel = 0; + // Get rid of spills before we start modifying the batch. + if (spillCount > 0) { + spillHashMapBatch(batch, hashTableResults, + spills, spillHashMapResultIndices, spillCount); + } /* * Optimize by running value expressions only over the matched rows. @@ -171,6 +160,7 @@ protected int finishInnerBigOnly(VectorizedRowBatch batch, performValueExpressions(batch, allMatchs, allMatchCount); } + int numSel = 0; for (int i = 0; i < equalKeySeriesCount; i++) { long count = equalKeySeriesValueCounts[i]; int allMatchesIndex = equalKeySeriesAllMatchIndices[i]; @@ -185,13 +175,8 @@ protected int finishInnerBigOnly(VectorizedRowBatch batch, duplicateCount, count); } } - - if (spillCount > 0) { - spillHashMapBatch(batch, hashTableResults, - spills, spillHashMapResultIndices, spillCount); - } - - return numSel; + batch.size = numSel; + batch.selectedInUse = true; } /** @@ -215,11 +200,11 @@ private int generateHashMultiSetResultSingleValue(VectorizedRowBatch batch, int[] allMatchs, int allMatchesIndex, int duplicateCount, int numSel) throws HiveException, IOException { - // LOG.info("generateHashMultiSetResultSingleValue enter..."); + // LOG.debug("generateHashMultiSetResultSingleValue enter..."); // Generate result within big table batch itself. - // LOG.info("generateHashMultiSetResultSingleValue with big table..."); + // LOG.debug("generateHashMultiSetResultSingleValue with big table..."); for (int i = 0; i < duplicateCount; i++) { @@ -250,7 +235,7 @@ private void generateHashMultiSetResultMultiValue(VectorizedRowBatch batch, int[] allMatchs, int allMatchesIndex, int duplicateCount, long count) throws HiveException, IOException { - // LOG.info("generateHashMultiSetResultMultiValue allMatchesIndex " + allMatchesIndex + " duplicateCount " + duplicateCount + " count " + count); + // LOG.debug("generateHashMultiSetResultMultiValue allMatchesIndex " + allMatchesIndex + " duplicateCount " + duplicateCount + " count " + count); // TODO: Look at repeating optimizations... @@ -309,11 +294,9 @@ protected int generateHashMultiSetResultRepeatedAll(VectorizedRowBatch batch, return 0; } - protected int finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, + protected void finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashMultiSetResult hashMultiSetResult) throws HiveException, IOException { - int numSel = 0; - switch (joinResult) { case MATCH: @@ -325,19 +308,21 @@ protected int finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.Join } // Generate special repeated case. - numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult); + int numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult); + batch.size = numSel; + batch.selectedInUse = true; break; case SPILL: // Whole batch is spilled. spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMultiSetResult); + batch.size = 0; break; case NOMATCH: // No match for entire batch. + batch.size = 0; break; } - - return numSel; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 2173829..a760a90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -146,9 +146,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column Long specific declarations. */ @@ -193,7 +190,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); + finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); } else { /* @@ -353,17 +350,11 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount))); } - numSel = finishInnerBigOnly(batch, - allMatchs, allMatchCount, - equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices, - equalKeySeriesDuplicateCounts, equalKeySeriesCount, - spills, spillHashMapResultIndices, spillCount, + finishInnerBigOnly(batch, + allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index ab6c17e..341b593 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -151,9 +151,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Multi-Key specific declarations. */ @@ -205,7 +202,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); + finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); } else { /* @@ -366,17 +363,11 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount))); } - numSel = finishInnerBigOnly(batch, - allMatchs, allMatchCount, - equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices, - equalKeySeriesDuplicateCounts, equalKeySeriesCount, - spills, spillHashMapResultIndices, spillCount, + finishInnerBigOnly(batch, + allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index 0b725aa..f9e82b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -182,7 +182,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); + finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]); } else { /* @@ -342,17 +342,11 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount))); } - numSel = finishInnerBigOnly(batch, - allMatchs, allMatchCount, - equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices, - equalKeySeriesDuplicateCounts, equalKeySeriesCount, - spills, spillHashMapResultIndices, spillCount, + finishInnerBigOnly(batch, + allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java index a7eb454..e954e9d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java @@ -132,38 +132,17 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { * @param batch * The big table batch with any matching and any non matching rows both as * selected in use. - * @param allMatchs - * A subset of the rows of the batch that are matches. * @param allMatchCount * Number of matches in allMatchs. - * @param equalKeySeriesHashMapResultIndices - * For each equal key series, the index into the hashMapResult. - * @param equalKeySeriesAllMatchIndices - * For each equal key series, the logical index into allMatchs. - * @param equalKeySeriesIsSingleValue - * For each equal key series, whether there is 1 or multiple small table values. - * @param equalKeySeriesDuplicateCounts - * For each equal key series, the number of duplicates or equal keys. * @param equalKeySeriesCount * Number of single value matches. - * @param spills - * A subset of the rows of the batch that are spills. - * @param spillHashMapResultIndices - * For each entry in spills, the index into the hashMapResult. * @param spillCount * Number of spills in spills. - * @param hashMapResults - * The array of all hash map results for the batch. * @param hashMapResultCount * Number of entries in hashMapResults. */ - protected int finishInner(VectorizedRowBatch batch, - int[] allMatchs, int allMatchCount, - int[] equalKeySeriesHashMapResultIndices, int[] equalKeySeriesAllMatchIndices, - boolean[] equalKeySeriesIsSingleValue, int[] equalKeySeriesDuplicateCounts, - int equalKeySeriesCount, - int[] spills, int[] spillHashMapResultIndices, int spillCount, - VectorMapJoinHashMapResult[] hashMapResults, int hashMapResultCount) + protected void finishInner(VectorizedRowBatch batch, + int allMatchCount, int equalKeySeriesCount, int spillCount, int hashMapResultCount) throws HiveException, IOException { int numSel = 0; @@ -196,10 +175,11 @@ protected int finishInner(VectorizedRowBatch batch, spills, spillHashMapResultIndices, spillCount); } - return numSel; + batch.size = numSel; + batch.selectedInUse = true; } - protected int finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, + protected void finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashTableResult hashMapResult) throws HiveException, IOException { int numSel = 0; @@ -215,22 +195,19 @@ protected int finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult } // Generate special repeated case. - numSel = generateHashMapResultRepeatedAll(batch, hashMapResults[0]); + generateHashMapResultRepeatedAll(batch, hashMapResults[0]); break; case SPILL: // Whole batch is spilled. spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResults[0]); + batch.size = 0; break; case NOMATCH: // No match for entire batch. + batch.size = 0; break; } - /* - * Common repeated join result processing. - */ - - return numSel; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index c998252..7273111 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -145,9 +145,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column Long specific declarations. */ @@ -192,7 +189,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]); + finishInnerRepeated(batch, joinResult, hashMapResults[0]); } else { /* @@ -352,18 +349,10 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount))); } - numSel = finishInner(batch, - allMatchs, allMatchCount, - equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices, - equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts, - equalKeySeriesCount, - spills, spillHashMapResultIndices, spillCount, - hashMapResults, hashMapResultCount); + finishInner(batch, + allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index e426476..2706f4b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -149,9 +149,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Multi-Key specific declarations. */ @@ -203,7 +200,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]); + finishInnerRepeated(batch, joinResult, hashMapResults[0]); } else { /* @@ -275,7 +272,7 @@ public void process(Object row, int tag) throws HiveException { haveSaveKey = true; /* - * Multi-Key specific save key and lookup. + * Multi-Key specific save key. */ temp = saveKeyOutput; @@ -364,18 +361,10 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount))); } - numSel = finishInner(batch, - allMatchs, allMatchCount, - equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices, - equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts, - equalKeySeriesCount, - spills, spillHashMapResultIndices, spillCount, - hashMapResults, hashMapResultCount); + finishInner(batch, + allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index 3bc225a..2fb036d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -136,9 +136,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column String specific declarations. */ @@ -181,7 +178,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]); + finishInnerRepeated(batch, joinResult, hashMapResults[0]); } else { /* @@ -341,18 +338,10 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount))); } - numSel = finishInner(batch, - allMatchs, allMatchCount, - equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices, - equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts, - equalKeySeriesCount, - spills, spillHashMapResultIndices, spillCount, - hashMapResults, hashMapResultCount); + finishInner(batch, + allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java index 230f9fe..07393b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java @@ -111,26 +111,23 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { * @param batch * The big table batch with any matching and any non matching rows both as * selected in use. - * @param allMatchs - * A subset of the rows of the batch that are matches. * @param allMatchCount * Number of matches in allMatchs. - * @param spills - * A subset of the rows of the batch that are spills. - * @param spillHashMapResultIndices - * For each entry in spills, the index into the hashTableResults. * @param spillCount * Number of spills in spills. * @param hashTableResults * The array of all hash table results for the batch. We need the * VectorMapJoinHashTableResult for the spill information. */ - protected int finishLeftSemi(VectorizedRowBatch batch, - int[] allMatchs, int allMatchCount, - int[] spills, int[] spillHashMapResultIndices, int spillCount, + protected void finishLeftSemi(VectorizedRowBatch batch, + int allMatchCount, int spillCount, VectorMapJoinHashTableResult[] hashTableResults) throws HiveException, IOException { - int numSel; + // Get rid of spills before we start modifying the batch. + if (spillCount > 0) { + spillHashMapBatch(batch, hashTableResults, + spills, spillHashMapResultIndices, spillCount); + } /* * Optimize by running value expressions only over the matched rows. @@ -139,14 +136,9 @@ protected int finishLeftSemi(VectorizedRowBatch batch, performValueExpressions(batch, allMatchs, allMatchCount); } - numSel = generateHashSetResults(batch, allMatchs, allMatchCount); - - if (spillCount > 0) { - spillHashMapBatch(batch, hashTableResults, - spills, spillHashMapResultIndices, spillCount); - } - - return numSel; + int numSel = generateHashSetResults(batch, allMatchs, allMatchCount); + batch.size = numSel; + batch.selectedInUse = true; } /** @@ -199,11 +191,9 @@ protected int generateHashSetResultRepeatedAll(VectorizedRowBatch batch) throws return batch.size; } - protected int finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, + protected void finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashTableResult hashSetResult) throws HiveException, IOException { - int numSel = 0; - switch (joinResult) { case MATCH: @@ -215,19 +205,21 @@ protected int finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResu } // Generate special repeated case. - numSel = generateHashSetResultRepeatedAll(batch); + int numSel = generateHashSetResultRepeatedAll(batch); + batch.size = numSel; + batch.selectedInUse = true; break; case SPILL: // Whole batch is spilled. spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashSetResult); + batch.size = 0; break; case NOMATCH: // No match for entire batch. + batch.size = 0; break; } - - return numSel; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index dd614da..2c1451f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -146,9 +146,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column Long specific declarations. */ @@ -193,7 +190,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]); + finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]); } else { /* @@ -343,15 +340,11 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount))); } - numSel = finishLeftSemi(batch, - allMatchs, allMatchCount, - spills, spillHashMapResultIndices, spillCount, + finishLeftSemi(batch, + allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index cf4f312..532198c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -150,9 +150,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Multi-Key specific declarations. */ @@ -205,7 +202,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]); + finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]); } else { /* @@ -286,6 +283,10 @@ public void process(Object row, int tag) throws HiveException { saveKeyOutput = currentKeyOutput; currentKeyOutput = temp; + /* + * Multi-key specific lookup key. + */ + byte[] keyBytes = saveKeyOutput.getData(); int keyLength = saveKeyOutput.getLength(); saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]); @@ -355,15 +356,11 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount))); } - numSel = finishLeftSemi(batch, - allMatchs, allMatchCount, - spills, spillHashMapResultIndices, spillCount, + finishLeftSemi(batch, + allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 12d663c..db4f023 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -137,9 +137,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column String specific declarations. */ @@ -182,7 +179,7 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]); + finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]); } else { /* @@ -258,6 +255,10 @@ public void process(Object row, int tag) throws HiveException { saveKeyBatchIndex = batchIndex; + /* + * Single-Column String specific lookup key. + */ + byte[] keyBytes = vector[batchIndex]; int keyStart = start[batchIndex]; int keyLength = length[batchIndex]; @@ -328,15 +329,11 @@ public void process(Object row, int tag) throws HiveException { " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount))); } - numSel = finishLeftSemi(batch, - allMatchs, allMatchCount, - spills, spillHashMapResultIndices, spillCount, + finishLeftSemi(batch, + allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 3309921..cbbc7fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -70,15 +70,34 @@ // generation. protected transient VectorMapJoinHashMapResult hashMapResults[]; - // Pre-allocated member for storing any matching row indexes during a processOp call. - protected transient int[] matchs; + // Pre-allocated member for remembering the big table's selected array at the beginning of + // the process method before applying any filter. For outer join we need to remember which + // rows did not match since they will appear the in outer join result with NULLs for the + // small table. + protected transient int[] inputSelected; - // Pre-allocated member for storing the mapping to the row batchIndex of the first of a series of - // equal keys that was looked up during a processOp call. - protected transient int[] matchHashMapResultIndices; + // Pre-allocated member for storing the (physical) batch index of matching row (single- or + // multi-small-table-valued) indexes during a process call. + protected transient int[] allMatchs; - // All matching and non-matching big table rows. - protected transient int[] nonSpills; + /* + * Pre-allocated members for storing information equal key series for small-table matches. + * + * ~HashMapResultIndices + * Index into the hashMapResults array for the match. + * ~AllMatchIndices + * (Logical) indices into allMatchs to the first row of a match of a + * possible series of duplicate keys. + * ~IsSingleValue + * Whether there is 1 or multiple small table values. + * ~DuplicateCounts + * The duplicate count for each matched key. + * + */ + protected transient int[] equalKeySeriesHashMapResultIndices; + protected transient int[] equalKeySeriesAllMatchIndices; + protected transient boolean[] equalKeySeriesIsSingleValue; + protected transient int[] equalKeySeriesDuplicateCounts; // Pre-allocated member for storing the (physical) batch index of rows that need to be spilled. protected transient int[] spills; @@ -86,8 +105,11 @@ // Pre-allocated member for storing index into the hashSetResults for each spilled row. protected transient int[] spillHashMapResultIndices; - // Pre-allocated member for storing any non-matching row indexes during a processOp call. - protected transient int[] scratch1; + // Pre-allocated member for storing any non-spills, non-matches, or merged row indexes during a + // process method call. + protected transient int[] nonSpills; + protected transient int[] noMatchs; + protected transient int[] merged; public VectorMapJoinOuterGenerateResultOperator() { super(); @@ -111,12 +133,23 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - matchs = new int[batch.DEFAULT_SIZE]; - matchHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - nonSpills = new int[batch.DEFAULT_SIZE]; + + inputSelected = new int[batch.DEFAULT_SIZE]; + + allMatchs = new int[batch.DEFAULT_SIZE]; + + equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + spills = new int[batch.DEFAULT_SIZE]; spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - scratch1 = new int[batch.DEFAULT_SIZE]; + + nonSpills = new int[batch.DEFAULT_SIZE]; + noMatchs = new int[batch.DEFAULT_SIZE]; + merged = new int[batch.DEFAULT_SIZE]; + } //----------------------------------------------------------------------------------------------- @@ -126,260 +159,348 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { */ /** - * Generate the outer join output results for one vectorized row batch. - * - * Any filter expressions will apply now since hash map lookup for outer join is complete. + * Apply the value expression to rows in the (original) input selected array. * * @param batch - * The big table batch with any matching and any non matching rows both as - * selected in use. - * @param matchs - * A subset of the rows of the batch that are matches. - * @param matchHashMapResultIndices - * For each entry in matches, the index into the hashMapResult. - * @param matchSize - * Number of matches in matchs. - * @param nonSpills - * The rows of the batch that are both matches and non-matches. - * @param nonspillCount - * Number of rows in nonSpills. - * @param spills - * A subset of the rows of the batch that are spills. - * @param spillHashMapResultIndices - * For each entry in spills, the index into the hashMapResult. - * @param spillCount - * Number of spills in spills. - * @param hashMapResults - * The array of all hash map results for the batch. - * @param hashMapResultCount - * Number of entries in hashMapResults. - * @param scratch1 - * Pre-allocated storage to internal use. + * The vectorized row batch. + * @param inputSelectedInUse + * Whether the (original) input batch is selectedInUse. + * @param inputLogicalSize + * The (original) input batch size. */ - public int finishOuter(VectorizedRowBatch batch, - int[] matchs, int[] matchHashMapResultIndices, int matchCount, - int[] nonSpills, int nonSpillCount, - int[] spills, int[] spillHashMapResultIndices, int spillCount, - VectorMapJoinHashMapResult[] hashMapResults, int hashMapResultCount, - int[] scratch1) throws IOException, HiveException { - - int numSel = 0; - - // At this point we have determined the matching rows only for the ON equality condition(s). - // Implicitly, non-matching rows are those in the selected array minus matchs. + private void doValueExprOnInputSelected(VectorizedRowBatch batch, + boolean inputSelectedInUse, int inputLogicalSize) { - // Next, for outer join, apply any ON predicates to filter down the matches. - if (matchCount > 0 && bigTableFilterExpressions.length > 0) { + int saveBatchSize = batch.size; + int[] saveSelected = batch.selected; + boolean saveSelectedInUse = batch.selectedInUse; - System.arraycopy(matchs, 0, batch.selected, 0, matchCount); - batch.size = matchCount; + batch.size = inputLogicalSize; + batch.selected = inputSelected; + batch.selectedInUse = inputSelectedInUse; - // Non matches will be removed from the selected array. - for (VectorExpression ve : bigTableFilterExpressions) { + if (bigTableValueExpressions != null) { + for(VectorExpression ve: bigTableValueExpressions) { ve.evaluate(batch); } - - // LOG.info("finishOuter" + - // " filtered batch.selected " + Arrays.toString(Arrays.copyOfRange(batch.selected, 0, batch.size))); - - // Fixup the matchHashMapResultIndices array. - if (batch.size < matchCount) { - int numMatch = 0; - int[] selected = batch.selected; - for (int i = 0; i < batch.size; i++) { - if (selected[i] == matchs[numMatch]) { - matchHashMapResultIndices[numMatch] = matchHashMapResultIndices[i]; - numMatch++; - if (numMatch == matchCount) { - break; - } - } - } - System.arraycopy(batch.selected, 0, matchs, 0, matchCount); - } } - // LOG.info("finishOuter" + - // " matchs[" + matchCount + "] " + intArrayToRangesString(matchs, matchCount) + - // " matchHashMapResultIndices " + Arrays.toString(Arrays.copyOfRange(matchHashMapResultIndices, 0, matchCount))); - // Big table value expressions apply to ALL matching and non-matching rows. - if (bigTableValueExpressions != null) { + batch.size = saveBatchSize; + batch.selected = saveSelected; + batch.selectedInUse = saveSelectedInUse; + } + + /** + * Apply the value expression to rows specified by a selected array. + * + * @param batch + * The vectorized row batch. + * @param selected + * The (physical) batch indices to apply the expression to. + * @param size + * The size of selected. + */ + private void doValueExpr(VectorizedRowBatch batch, + int[] selected, int size) { - System.arraycopy(nonSpills, 0, batch.selected, 0, nonSpillCount); - batch.size = nonSpillCount; + int saveBatchSize = batch.size; + int[] saveSelected = batch.selected; + boolean saveSelectedInUse = batch.selectedInUse; - for (VectorExpression ve: bigTableValueExpressions) { + batch.size = size; + batch.selected = selected; + batch.selectedInUse = true; + + if (bigTableValueExpressions != null) { + for(VectorExpression ve: bigTableValueExpressions) { ve.evaluate(batch); } } - // Determine which rows are non matches by determining the delta between selected and - // matchs. - int[] noMatchs = scratch1; - int noMatchCount = 0; - if (matchCount < nonSpillCount) { - // Determine which rows are non matches. - int matchIndex = 0; - for (int i = 0; i < nonSpillCount; i++) { - int candidateIndex = nonSpills[i]; - if (matchIndex < matchCount && candidateIndex == matchs[matchIndex]) { - matchIndex++; - } else { - noMatchs[noMatchCount++] = candidateIndex; - } - } - } - // LOG.info("finishOuter" + - // " noMatchs[" + noMatchCount + "] " + intArrayToRangesString(noMatchs, noMatchCount)); + batch.size = saveBatchSize; + batch.selected = saveSelected; + batch.selectedInUse = saveSelectedInUse; + } + /** + * Remove (subtract) members from the input selected array and produce the results into + * a difference array. + * + * @param inputSelectedInUse + * Whether the (original) input batch is selectedInUse. + * @param inputLogicalSize + * The (original) input batch size. + * @param remove + * The indices to remove. They must all be present in input selected array. + * @param removeSize + * The size of remove. + * @param difference + * The resulting difference -- the input selected array indices not in the + * remove array. + * @return + * The resulting size of the difference array. + * @throws HiveException + */ + private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogicalSize, + int[] remove, int removeSize, int[] difference) throws HiveException { - // When we generate results into the overflow batch, we may still end up with fewer rows - // in the big table batch. So, nulSel and the batch's selected array will be rebuilt with - // just the big table rows that need to be forwarded, minus any rows processed with the - // overflow batch. - if (matchCount > 0) { - numSel = generateOuterHashMapMatchResults(batch, - matchs, matchHashMapResultIndices, matchCount, - hashMapResults, numSel); - } + // if (!verifyMonotonicallyIncreasing(remove, removeSize)) { + // throw new HiveException("remove is not in sort order and unique"); + // } - if (noMatchCount > 0) { - numSel = generateOuterHashMapNoMatchResults(batch, noMatchs, noMatchCount, numSel); + int differenceCount = 0; + + // Determine which rows are left. + int removeIndex = 0; + if (inputSelectedInUse) { + for (int i = 0; i < inputLogicalSize; i++) { + int candidateIndex = inputSelected[i]; + if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) { + removeIndex++; + } else { + difference[differenceCount++] = candidateIndex; + } + } + } else { + for (int candidateIndex = 0; candidateIndex < inputLogicalSize; candidateIndex++) { + if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) { + removeIndex++; + } else { + difference[differenceCount++] = candidateIndex; + } + } + } + + if (removeIndex != removeSize) { + throw new HiveException("Not all batch indices removed"); + } + + // if (!verifyMonotonicallyIncreasing(difference, differenceCount)) { + // throw new HiveException("difference is not in sort order and unique"); + // } + + return differenceCount; + } + + /** + * Remove (subtract) members from an array and produce the results into + * a difference array. + + * @param all + * The selected array containing all members. + * @param allSize + * The size of all. + * @param remove + * The indices to remove. They must all be present in input selected array. + * @param removeSize + * The size of remove. + * @param difference + * The resulting difference -- the all array indices not in the + * remove array. + * @return + * The resulting size of the difference array. + * @throws HiveException + */ + private int subtract(int[] all, int allSize, + int[] remove, int removeSize, int[] difference) throws HiveException { + + // if (!verifyMonotonicallyIncreasing(remove, removeSize)) { + // throw new HiveException("remove is not in sort order and unique"); + // } + + int differenceCount = 0; + + // Determine which rows are left. + int removeIndex = 0; + for (int i = 0; i < allSize; i++) { + int candidateIndex = all[i]; + if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) { + removeIndex++; + } else { + difference[differenceCount++] = candidateIndex; + } } - if (spillCount > 0) { - spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults, - spills, spillHashMapResultIndices, spillCount); + if (removeIndex != removeSize) { + throw new HiveException("Not all batch indices removed"); } - return numSel; + return differenceCount; } - /** - * Generate the matching outer join output results for one row of a vectorized row batch into - * the overflow batch. - * - * @param batch - * The big table batch. - * @param batchIndex - * Index of the big table row. - * @param hashMapResult - * The hash map result with the small table values. - */ - private void copyOuterHashMapResultToOverflow(VectorizedRowBatch batch, int batchIndex, - VectorMapJoinHashMapResult hashMapResult) throws HiveException, IOException { - - // if (hashMapResult.isCappedCountAvailable()) { - // LOG.info("copyOuterHashMapResultToOverflow cappedCount " + hashMapResult.cappedCount()); - // } - ByteSegmentRef byteSegmentRef = hashMapResult.first(); - while (byteSegmentRef != null) { - - // Copy the BigTable values into the overflow batch. Since the overflow batch may - // not get flushed here, we must copy by value. - if (bigTableRetainedVectorCopy != null) { - bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, - overflowBatch, overflowBatch.size); - } - - // Reference the keys we just copied above. - if (bigTableVectorCopyOuterKeys != null) { - bigTableVectorCopyOuterKeys.copyByReference(overflowBatch, overflowBatch.size, - overflowBatch, overflowBatch.size); - } + /** + * Sort merge two select arrays so the resulting array is ordered by (batch) index. + * + * @param selected1 + * @param selected1Count + * @param selected2 + * @param selected2Count + * @param sortMerged + * The resulting sort merge of selected1 and selected2. + * @return + * The resulting size of the sortMerged array. + * @throws HiveException + */ + private int sortMerge(int[] selected1, int selected1Count, + int[] selected2, int selected2Count, int[] sortMerged) throws HiveException { - if (smallTableVectorDeserializeRow != null) { + // if (!verifyMonotonicallyIncreasing(selected1, selected1Count)) { + // throw new HiveException("selected1 is not in sort order and unique"); + // } - byte[] bytes = byteSegmentRef.getBytes(); - int offset = (int) byteSegmentRef.getOffset(); - int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + // if (!verifyMonotonicallyIncreasing(selected2, selected2Count)) { + // throw new HiveException("selected1 is not in sort order and unique"); + // } - smallTableVectorDeserializeRow.deserializeByValue(overflowBatch, overflowBatch.size); - } - ++overflowBatch.size; - if (overflowBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { - forwardOverflow(); - } + int sortMergeCount = 0; - byteSegmentRef = hashMapResult.next(); + int selected1Index = 0; + int selected2Index = 0; + for (int i = 0; i < selected1Count + selected2Count; i++) { + if (selected1Index < selected1Count && selected2Index < selected2Count) { + if (selected1[selected1Index] < selected2[selected2Index]) { + sortMerged[sortMergeCount++] = selected1[selected1Index++]; + } else { + sortMerged[sortMergeCount++] = selected2[selected2Index++]; + } + } else if (selected1Index < selected1Count) { + sortMerged[sortMergeCount++] = selected1[selected1Index++]; + } else { + sortMerged[sortMergeCount++] = selected2[selected2Index++]; } - // LOG.info("copyOuterHashMapResultToOverflow overflowBatch.size " + overflowBatch.size); + } - } + // if (!verifyMonotonicallyIncreasing(sortMerged, sortMergeCount)) { + // throw new HiveException("sortMerged is not in sort order and unique"); + // } - /** - * Generate the matching outer join output results for one vectorized row batch. - * - * For each matching row specified by parameter, get the one or more small table values and - * form join results. - * - * (Note: Since all matching and non-matching rows are selected and output for outer joins, - * we cannot use selected as the matching rows). - * - * @param batch - * The big table batch with any matching and any non matching rows both as - * selected in use. - * @param matchs - * A subset of the rows of the batch that are matches. - * @param matchHashMapResultIndices - * For each entry in matches, the index into the hashMapResult. - * @param matchSize - * Number of matches in matchs. - * @param hashMapResults - * The array of all hash map results for the batch. - * @param numSel - * The current count of rows in the rebuilding of the selected array. - * - * @return - * The new count of selected rows. - */ - protected int generateOuterHashMapMatchResults(VectorizedRowBatch batch, - int[] matchs, int[] matchHashMapResultIndices, int matchSize, - VectorMapJoinHashMapResult[] hashMapResults, int numSel) - throws IOException, HiveException { + return sortMergeCount; + } - int[] selected = batch.selected; + /** + * Generate the outer join output results for one vectorized row batch. + * + * @param batch + * The big table batch with any matching and any non matching rows both as + * selected in use. + * @param allMatchCount + * Number of matches in allMatchs. + * @param equalKeySeriesCount + * Number of single value matches. + * @param atLeastOneNonMatch + * Whether at least one row was a non-match. + * @param inputSelectedInUse + * A copy of the batch's selectedInUse flag on input to the process method. + * @param inputLogicalSize + * The batch's size on input to the process method. + * @param spillCount + * Number of spills in spills. + * @param hashMapResultCount + * Number of entries in hashMapResults. + */ + public void finishOuter(VectorizedRowBatch batch, + int allMatchCount, int equalKeySeriesCount, boolean atLeastOneNonMatch, + boolean inputSelectedInUse, int inputLogicalSize, + int spillCount, int hashMapResultCount) throws IOException, HiveException { - // Generate result within big table batch when single small table value. Otherwise, copy - // to overflow batch. + // LOG.debug("finishOuter" + + // " matchs[" + matchCount + "] " + intArrayToRangesString(matchs, matchCount) + + // " matchHashMapResultIndices " + Arrays.toString(Arrays.copyOfRange(matchHashMapResultIndices, 0, matchCount))); - for (int i = 0; i < matchSize; i++) { - int batchIndex = matchs[i]; + // Get rid of spills before we start modifying the batch. + if (spillCount > 0) { + spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults, + spills, spillHashMapResultIndices, spillCount); + } - int hashMapResultIndex = matchHashMapResultIndices[i]; - VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex]; + int noMatchCount = 0; + if (spillCount > 0) { - if (!hashMapResult.isSingleRow()) { + // Subtract the spills to get all match and non-match rows. + int nonSpillCount = subtractFromInputSelected( + inputSelectedInUse, inputLogicalSize, spills, spillCount, nonSpills); + + // Big table value expressions apply to ALL matching and non-matching rows. + if (bigTableValueExpressions != null) { + + doValueExpr(batch, nonSpills, nonSpillCount); + + } + + if (atLeastOneNonMatch) { + noMatchCount = subtract(nonSpills, nonSpillCount, allMatchs, allMatchCount, + noMatchs); + } + } else { - // Multiple small table rows require use of the overflow batch. - copyOuterHashMapResultToOverflow(batch, batchIndex, hashMapResult); - } else { + // Run value expressions over original (whole) input batch. + doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize); - // Generate join result in big table batch. - ByteSegmentRef byteSegmentRef = hashMapResult.first(); + if (atLeastOneNonMatch) { + noMatchCount = subtractFromInputSelected( + inputSelectedInUse, inputLogicalSize, allMatchs, allMatchCount, noMatchs); + } + } - if (bigTableVectorCopyOuterKeys != null) { - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); - } + LOG.debug("finishOuter" + + " noMatchs[" + noMatchCount + "] " + intArrayToRangesString(noMatchs, noMatchCount)); - if (smallTableVectorDeserializeRow != null) { - byte[] bytes = byteSegmentRef.getBytes(); - int offset = (int) byteSegmentRef.getOffset(); - int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + // When we generate results into the overflow batch, we may still end up with fewer rows + // in the big table batch. So, nulSel and the batch's selected array will be rebuilt with + // just the big table rows that need to be forwarded, minus any rows processed with the + // overflow batch. + if (allMatchCount > 0) { + + int numSel = 0; + for (int i = 0; i < equalKeySeriesCount; i++) { + int hashMapResultIndex = equalKeySeriesHashMapResultIndices[i]; + VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex]; + int allMatchesIndex = equalKeySeriesAllMatchIndices[i]; + boolean isSingleValue = equalKeySeriesIsSingleValue[i]; + int duplicateCount = equalKeySeriesDuplicateCounts[i]; + + if (isSingleValue) { + numSel = generateHashMapResultSingleValue( + batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount, numSel); + } else { + generateHashMapResultMultiValue( + batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount); + } + } - smallTableVectorDeserializeRow.deserializeByValue(batch, batchIndex); - } + // The number of single value rows that were generated in the big table batch. + batch.size = numSel; + batch.selectedInUse = true; + } else { + batch.size = 0; + } - // Remember this big table row was used for an output result. - selected[numSel++] = batchIndex; - } - } - return numSel; - } + if (noMatchCount > 0) { + if (batch.size > 0) { + + generateOuterNulls(batch, noMatchs, noMatchCount); + + // Merge noMatchs and (match) selected. + int mergeCount = sortMerge( + noMatchs, noMatchCount, batch.selected, batch.size, merged); + + System.arraycopy(merged, 0, batch.selected, 0, mergeCount); + batch.size = mergeCount; + batch.selectedInUse = true; + } else { + + // We can use the whole batch for output of no matches. + + generateOuterNullsRepeatedAll(batch); + + System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount); + batch.size = noMatchCount; + batch.selectedInUse = true; + } + } + } /** * Generate the non matching outer join output results for one vectorized row batch. @@ -393,72 +514,30 @@ protected int generateOuterHashMapMatchResults(VectorizedRowBatch batch, * A subset of the rows of the batch that are non matches. * @param noMatchSize * Number of non matches in noMatchs. - * @param numSel - * The current count of rows in the rebuilding of the selected array. - * - * @return - * The new count of selected rows. */ - protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] noMatchs, - int noMatchSize, int numSel) throws IOException, HiveException { - int[] selected = batch.selected; - - // Generate result within big table batch with null small table results, using isRepeated - // if possible. + protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, + int noMatchSize) throws IOException, HiveException { - if (numSel == 0) { + // Set null information in the small table results area. - // There were 0 matching rows -- so we can use the isRepeated optimization for the non - // matching rows. + for (int i = 0; i < noMatchSize; i++) { + int batchIndex = noMatchs[i]; // Mark any scratch small table scratch columns that would normally receive a copy of the - // key as null and repeating. + // key as null, too. for (int column : bigTableOuterKeyMapping.getOutputColumns()) { ColumnVector colVector = batch.cols[column]; - colVector.isRepeating = true; colVector.noNulls = false; - colVector.isNull[0] = true; + colVector.isNull[batchIndex] = true; } - // Small table values are set to null and repeating. + // Small table values are set to null. for (int column : smallTableMapping.getOutputColumns()) { ColumnVector colVector = batch.cols[column]; - colVector.isRepeating = true; colVector.noNulls = false; - colVector.isNull[0] = true; - } - - // Rebuild the selected array. - for (int i = 0; i < noMatchSize; i++) { - int batchIndex = noMatchs[i]; - selected[numSel++] = batchIndex; - } - } else { - - // Set null information in the small table results area. - - for (int i = 0; i < noMatchSize; i++) { - int batchIndex = noMatchs[i]; - - // Mark any scratch small table scratch columns that would normally receive a copy of the - // key as null, too. - for (int column : bigTableOuterKeyMapping.getOutputColumns()) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } - - // Small table values are set to null. - for (int column : smallTableMapping.getOutputColumns()) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } - - selected[numSel++] = batchIndex; + colVector.isNull[batchIndex] = true; } } - return numSel; } /** @@ -473,65 +552,114 @@ protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] * The hash map lookup result for the repeated key. * @param hashMapResults * The array of all hash map results for the batch. + * @param someRowsFilteredOut + * Whether some rows of the repeated key batch were knocked out by the filter. + * @param inputSelectedInUse + * A copy of the batch's selectedInUse flag on input to the process method. + * @param inputLogicalSize + * The batch's size on input to the process method. * @param scratch1 * Pre-allocated storage to internal use. + * @param scratch2 + * Pre-allocated storage to internal use. */ - public int finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, - VectorMapJoinHashMapResult hashMapResult, int[] scratch1) + public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, + VectorMapJoinHashMapResult hashMapResult, boolean someRowsFilteredOut, + boolean inputSelectedInUse, int inputLogicalSize) throws IOException, HiveException { - int numSel = 0; + // LOG.debug("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size + " someRowsFilteredOut " + someRowsFilteredOut); - if (joinResult == JoinUtil.JoinResult.MATCH && bigTableFilterExpressions.length > 0) { + switch (joinResult) { + case MATCH: - // Since it is repeated, the evaluation of the filter will knock the whole batch out. - // But since we are doing outer join, we want to keep non-matches. + // Rows we looked up as one repeated key are a match. But filtered out rows + // need to be generated as non-matches, too. - // First, remember selected; - int[] rememberSelected = scratch1; - int rememberBatchSize = batch.size; - if (batch.selectedInUse) { - System.arraycopy(batch.selected, 0, rememberSelected, 0, batch.size); - } + if (someRowsFilteredOut) { - // Filter. - for (VectorExpression ve : bigTableFilterExpressions) { - ve.evaluate(batch); - } + // For the filtered out rows that didn't (logically) get looked up in the hash table, + // we need to generate no match results for those too... - // Convert a filter out to a non match. - if (batch.size == 0) { - joinResult = JoinUtil.JoinResult.NOMATCH; - if (batch.selectedInUse) { - System.arraycopy(rememberSelected, 0, batch.selected, 0, rememberBatchSize); - // LOG.info("finishOuterRepeated batch #" + batchCounter + " filter out converted to no matchs " + - // Arrays.toString(Arrays.copyOfRange(batch.selected, 0, rememberBatchSize))); - } else { - // LOG.info("finishOuterRepeated batch #" + batchCounter + " filter out converted to no matchs batch size " + - // rememberBatchSize); - } - batch.size = rememberBatchSize; - } - } + // Run value expressions over original (whole) input batch. + doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize); - // LOG.info("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size); - switch (joinResult) { - case MATCH: - // Run our value expressions over whole batch. - if (bigTableValueExpressions != null) { - for(VectorExpression ve: bigTableValueExpressions) { - ve.evaluate(batch); + // Now calculate which rows were filtered out (they are logically no matches). + + // Determine which rows are non matches by determining the delta between inputSelected and + // (current) batch selected. + + int noMatchCount = subtractFromInputSelected( + inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs); + + generateOuterNulls(batch, noMatchs, noMatchCount); + + // Now generate the matchs. Single small table values will be put into the big table + // batch and come back in matchs. Any multiple small table value results will go into + // the overflow batch. + generateHashMapResultRepeatedAll(batch, hashMapResult); + + // Merge noMatchs and (match) selected. + int mergeCount = sortMerge( + noMatchs, noMatchCount, batch.selected, batch.size, merged); + + System.arraycopy(merged, 0, batch.selected, 0, mergeCount); + batch.size = mergeCount; + batch.selectedInUse = true; + } else { + + // Just run our value expressions over input batch. + + if (bigTableValueExpressions != null) { + for(VectorExpression ve: bigTableValueExpressions) { + ve.evaluate(batch); + } } - } - // Use a common method applicable for inner and outer. - numSel = generateHashMapResultRepeatedAll(batch, hashMapResult); + generateHashMapResultRepeatedAll(batch, hashMapResult); + } break; + case SPILL: - // Whole batch is spilled. + + // Rows we looked up as one repeated key need to spill. But filtered out rows + // need to be generated as non-matches, too. + spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResult); + + // After using selected to generate spills, generate non-matches, if any. + if (someRowsFilteredOut) { + + // Determine which rows are non matches by determining the delta between inputSelected and + // (current) batch selected. + + int noMatchCount = subtractFromInputSelected( + inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs); + + System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount); + batch.size = noMatchCount; + batch.selectedInUse = true; + + generateOuterNullsRepeatedAll(batch); + } else { + batch.size = 0; + } + break; + case NOMATCH: + + if (someRowsFilteredOut) { + + // When the repeated no match is due to filtering, we need to restore the + // selected information. + + if (inputSelectedInUse) { + System.arraycopy(inputSelected, 0, batch.selected, 0, inputLogicalSize); + } + batch.size = inputLogicalSize; + } + // Run our value expressions over whole batch. if (bigTableValueExpressions != null) { for(VectorExpression ve: bigTableValueExpressions) { @@ -539,11 +667,9 @@ public int finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joi } } - numSel = generateOuterNullsRepeatedAll(batch); + generateOuterNullsRepeatedAll(batch); break; } - - return numSel; } /** @@ -554,24 +680,8 @@ public int finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joi * * @param batch * The big table batch. - * @return - * The new count of selected rows. */ - protected int generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { - - int[] selected = batch.selected; - boolean selectedInUse = batch.selectedInUse; - - // Generate result within big table batch using is repeated for null small table results. - - if (batch.selectedInUse) { - // The selected array is already filled in as we want it. - } else { - for (int i = 0; i < batch.size; i++) { - selected[i] = i; - } - batch.selectedInUse = true; - } + protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { for (int column : smallTableMapping.getOutputColumns()) { ColumnVector colVector = batch.cols[column]; @@ -588,12 +698,5 @@ protected int generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hiv colVector.isNull[0] = true; colVector.isRepeating = true; } - - // for (int i = 0; i < batch.size; i++) { - // int bigTableIndex = selected[i]; - // VectorizedBatchUtil.debugDisplayOneRow(batch, bigTableIndex, taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator generate generateOuterNullsRepeatedAll batch"); - // } - - return batch.size; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 8f18672..e6da04f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -24,7 +24,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -123,9 +125,6 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; - // For outer join, DO NOT apply filters yet. It is incorrect for outer join to - // apply the filter before hash table matching. - final int inputLogicalSize = batch.size; if (inputLogicalSize == 0) { @@ -135,6 +134,27 @@ public void process(Object row, int tag) throws HiveException { return; } + // For outer join, remember our input rows before ON expression filtering or before + // hash table matching so we can generate results for all rows (matching and non matching) + // later. + boolean inputSelectedInUse = batch.selectedInUse; + if (inputSelectedInUse) { + // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { + // throw new HiveException("batch.selected is not in sort order and unique"); + // } + System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); + } + + // Filtering for outer join just removes rows available for hash table matching. + boolean someRowsFilteredOut = false; + if (bigTableFilterExpressions.length > 0) { + // Since the input + for (VectorExpression ve : bigTableFilterExpressions) { + ve.evaluate(batch); + } + someRowsFilteredOut = (batch.size != inputLogicalSize); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { @@ -142,9 +162,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column Long specific declarations. */ @@ -174,12 +191,16 @@ public void process(Object row, int tag) throws HiveException { */ JoinUtil.JoinResult joinResult; - if (!joinColVector.noNulls && joinColVector.isNull[0]) { - // Null key is no match for whole batch. + if (batch.size == 0) { + // Whole repeated key batch was filtered out. + joinResult = JoinUtil.JoinResult.NOMATCH; + } else if (!joinColVector.noNulls && joinColVector.isNull[0]) { + // Any (repeated) null key column is no match for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { // Handle *repeated* join key, if found. long key = vector[0]; + LOG.debug(CLASS_NAME + " repeated key " + key); if (useMinMax && (key < min || key > max)) { // Out of range for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; @@ -195,7 +216,8 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1); + finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, + inputSelectedInUse, inputLogicalSize); } else { /* @@ -209,14 +231,13 @@ public void process(Object row, int tag) throws HiveException { int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; - // For outer join we must apply the filter after match and cause some matches to become - // non-matches, we do not track non-matches here. Instead we remember all non spilled rows - // and compute non matches later in finishOuter. int hashMapResultCount = 0; - int matchCount = 0; - int nonSpillCount = 0; + int allMatchCount = 0; + int equalKeySeriesCount = 0; int spillCount = 0; + boolean atLeastOneNonMatch = someRowsFilteredOut; + /* * Single-Column Long specific variables. */ @@ -228,9 +249,11 @@ public void process(Object row, int tag) throws HiveException { JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH; // Logical loop over the rows in the batch since the batch may have selected in use. - for (int logical = 0; logical < inputLogicalSize; logical++) { + for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); + /* * Single-Column Long outer null detection. */ @@ -246,8 +269,8 @@ public void process(Object row, int tag) throws HiveException { // Let a current SPILL equal key series keep going, or // Let a current NOMATCH keep not matching. - // Remember non-matches for Outer Join. - nonSpills[nonSpillCount++] = batchIndex; + atLeastOneNonMatch = true; + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { @@ -265,9 +288,12 @@ public void process(Object row, int tag) throws HiveException { // New key. if (haveSaveKey) { - // Move on with our count(s). + // Move on with our counts. switch (saveJoinResult) { case MATCH: + hashMapResultCount++; + equalKeySeriesCount++; + break; case SPILL: hashMapResultCount++; break; @@ -296,41 +322,70 @@ public void process(Object row, int tag) throws HiveException { } else { saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); } - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name()); - } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name()); - } - /* - * Common outer join result processing. - */ + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name()); + + /* + * Common outer join result processing. + */ - switch (saveJoinResult) { - case MATCH: - matchs[matchCount] = batchIndex; - matchHashMapResultIndices[matchCount] = hashMapResultCount; - matchCount++; - nonSpills[nonSpillCount++] = batchIndex; - break; - - case SPILL: - spills[spillCount] = batchIndex; - spillHashMapResultIndices[spillCount] = hashMapResultCount; - spillCount++; - break; - - case NOMATCH: - nonSpills[nonSpillCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); - break; + switch (saveJoinResult) { + case MATCH: + equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; + equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; + equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); + equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; + allMatchs[allMatchCount++] = batchIndex; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); + break; + + case SPILL: + spills[spillCount] = batchIndex; + spillHashMapResultIndices[spillCount] = hashMapResultCount; + spillCount++; + break; + + case NOMATCH: + atLeastOneNonMatch = true; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); + break; + } + } else { + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); + + // Series of equal keys. + + switch (saveJoinResult) { + case MATCH: + equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; + allMatchs[allMatchCount++] = batchIndex; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); + break; + + case SPILL: + spills[spillCount] = batchIndex; + spillHashMapResultIndices[spillCount] = hashMapResultCount; + spillCount++; + break; + + case NOMATCH: + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); + break; + } } + // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { + // throw new HiveException("allMatchs is not in sort order and unique"); + // } } } if (haveSaveKey) { - // Account for last equal key sequence. + // Update our counts for the last key. switch (saveJoinResult) { case MATCH: + hashMapResultCount++; + equalKeySeriesCount++; + break; case SPILL: hashMapResultCount++; break; @@ -341,27 +396,26 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + - " matchs " + intArrayToRangesString(matchs, matchCount) + - " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) + - " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) + + " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) + + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + + " atLeastOneNonMatch " + atLeastOneNonMatch + + " inputSelectedInUse " + inputSelectedInUse + + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount))); } // We will generate results for all matching and non-matching rows. - // Note that scratch1 is undefined at this point -- it's preallocated storage. - numSel = finishOuter(batch, - matchs, matchHashMapResultIndices, matchCount, - nonSpills, nonSpillCount, - spills, spillHashMapResultIndices, spillCount, - hashMapResults, hashMapResultCount, - scratch1); + finishOuter(batch, + allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, + inputSelectedInUse, inputLogicalSize, + spillCount, hashMapResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index ffee959..46b9564 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -128,9 +128,6 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; - // For outer join, DO NOT apply filters yet. It is incorrect for outer join to - // apply the filter before hash table matching. - final int inputLogicalSize = batch.size; if (inputLogicalSize == 0) { @@ -140,6 +137,27 @@ public void process(Object row, int tag) throws HiveException { return; } + // For outer join, remember our input rows before ON expression filtering or before + // hash table matching so we can generate results for all rows (matching and non matching) + // later. + boolean inputSelectedInUse = batch.selectedInUse; + if (inputSelectedInUse) { + // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { + // throw new HiveException("batch.selected is not in sort order and unique"); + // } + System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); + } + + // Filtering for outer join just removes rows available for hash table matching. + boolean someRowsFilteredOut = false; + if (bigTableFilterExpressions.length > 0) { + // Since the input + for (VectorExpression ve : bigTableFilterExpressions) { + ve.evaluate(batch); + } + someRowsFilteredOut = (batch.size != inputLogicalSize); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { @@ -147,9 +165,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Multi-Key specific declarations. */ @@ -195,8 +210,11 @@ public void process(Object row, int tag) throws HiveException { */ JoinUtil.JoinResult joinResult; - if (someKeyInputColumnIsNull) { - // Any null key column is no match for whole batch. + if (batch.size == 0) { + // Whole repeated key batch was filtered out. + joinResult = JoinUtil.JoinResult.NOMATCH; + } else if (someKeyInputColumnIsNull) { + // Any (repeated) null key column is no match for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { @@ -215,7 +233,8 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1); + finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, + inputSelectedInUse, inputLogicalSize); } else { /* @@ -229,14 +248,13 @@ public void process(Object row, int tag) throws HiveException { int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; - // For outer join we must apply the filter after match and cause some matches to become - // non-matches, we do not track non-matches here. Instead we remember all non spilled rows - // and compute non matches later in finishOuter. int hashMapResultCount = 0; - int matchCount = 0; - int nonSpillCount = 0; + int allMatchCount = 0; + int equalKeySeriesCount = 0; int spillCount = 0; + boolean atLeastOneNonMatch = someRowsFilteredOut; + /* * Multi-Key specific variables. */ @@ -248,9 +266,11 @@ public void process(Object row, int tag) throws HiveException { JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH; // Logical loop over the rows in the batch since the batch may have selected in use. - for (int logical = 0; logical < inputLogicalSize; logical++) { + for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); + /* * Multi-Key outer null detection. */ @@ -268,8 +288,8 @@ public void process(Object row, int tag) throws HiveException { // Let a current SPILL equal key series keep going, or // Let a current NOMATCH keep not matching. - // Remember non-matches for Outer Join. - nonSpills[nonSpillCount++] = batchIndex; + atLeastOneNonMatch = true; + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { @@ -288,9 +308,12 @@ public void process(Object row, int tag) throws HiveException { // New key. if (haveSaveKey) { - // Move on with our count(s). + // Move on with our counts. switch (saveJoinResult) { case MATCH: + hashMapResultCount++; + equalKeySeriesCount++; + break; case SPILL: hashMapResultCount++; break; @@ -318,41 +341,68 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = saveKeyOutput.getData(); int keyLength = saveKeyOutput.getLength(); saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name()); - } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name()); - } - /* - * Common outer join result processing. - */ + /* + * Common outer join result processing. + */ - switch (saveJoinResult) { - case MATCH: - matchs[matchCount] = batchIndex; - matchHashMapResultIndices[matchCount] = hashMapResultCount; - matchCount++; - nonSpills[nonSpillCount++] = batchIndex; - break; - - case SPILL: - spills[spillCount] = batchIndex; - spillHashMapResultIndices[spillCount] = hashMapResultCount; - spillCount++; - break; - - case NOMATCH: - nonSpills[nonSpillCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); - break; + switch (saveJoinResult) { + case MATCH: + equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; + equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; + equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); + equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; + allMatchs[allMatchCount++] = batchIndex; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); + break; + + case SPILL: + spills[spillCount] = batchIndex; + spillHashMapResultIndices[spillCount] = hashMapResultCount; + spillCount++; + break; + + case NOMATCH: + atLeastOneNonMatch = true; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); + break; + } + } else { + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); + + // Series of equal keys. + + switch (saveJoinResult) { + case MATCH: + equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; + allMatchs[allMatchCount++] = batchIndex; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); + break; + + case SPILL: + spills[spillCount] = batchIndex; + spillHashMapResultIndices[spillCount] = hashMapResultCount; + spillCount++; + break; + + case NOMATCH: + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); + break; + } + } + if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { + throw new HiveException("allMatchs is not in sort order and unique"); } } } if (haveSaveKey) { - // Account for last equal key sequence. + // Update our counts for the last key. switch (saveJoinResult) { case MATCH: + hashMapResultCount++; + equalKeySeriesCount++; + break; case SPILL: hashMapResultCount++; break; @@ -363,27 +413,26 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + - " matchs " + intArrayToRangesString(matchs, matchCount) + - " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) + - " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) + + " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) + + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + + " atLeastOneNonMatch " + atLeastOneNonMatch + + " inputSelectedInUse " + inputSelectedInUse + + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount))); } // We will generate results for all matching and non-matching rows. - // Note that scratch1 is undefined at this point -- it's preallocated storage. - numSel = finishOuter(batch, - matchs, matchHashMapResultIndices, matchCount, - nonSpills, nonSpillCount, - spills, spillHashMapResultIndices, spillCount, - hashMapResults, hashMapResultCount, - scratch1); + finishOuter(batch, + allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, + inputSelectedInUse, inputLogicalSize, + spillCount, hashMapResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 5167c19..1c6e3dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -115,9 +115,6 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; - // For outer join, DO NOT apply filters yet. It is incorrect for outer join to - // apply the filter before hash table matching. - final int inputLogicalSize = batch.size; if (inputLogicalSize == 0) { @@ -127,6 +124,27 @@ public void process(Object row, int tag) throws HiveException { return; } + // For outer join, remember our input rows before ON expression filtering or before + // hash table matching so we can generate results for all rows (matching and non matching) + // later. + boolean inputSelectedInUse = batch.selectedInUse; + if (inputSelectedInUse) { + // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { + // throw new HiveException("batch.selected is not in sort order and unique"); + // } + System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); + } + + // Filtering for outer join just removes rows available for hash table matching. + boolean someRowsFilteredOut = false; + if (bigTableFilterExpressions.length > 0) { + // Since the input + for (VectorExpression ve : bigTableFilterExpressions) { + ve.evaluate(batch); + } + someRowsFilteredOut = (batch.size != inputLogicalSize); + } + // Perform any key expressions. Results will go into scratch columns. if (bigTableKeyExpressions != null) { for (VectorExpression ve : bigTableKeyExpressions) { @@ -134,9 +152,6 @@ public void process(Object row, int tag) throws HiveException { } } - // We rebuild in-place the selected array with rows destine to be forwarded. - int numSel = 0; - /* * Single-Column String specific declarations. */ @@ -168,8 +183,11 @@ public void process(Object row, int tag) throws HiveException { */ JoinUtil.JoinResult joinResult; - if (!joinColVector.noNulls && joinColVector.isNull[0]) { - // Null key is no match for whole batch. + if (batch.size == 0) { + // Whole repeated key batch was filtered out. + joinResult = JoinUtil.JoinResult.NOMATCH; + } else if (!joinColVector.noNulls && joinColVector.isNull[0]) { + // Any (repeated) null key column is no match for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { // Handle *repeated* join key, if found. @@ -186,7 +204,8 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); } - numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1); + finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, + inputSelectedInUse, inputLogicalSize); } else { /* @@ -200,14 +219,13 @@ public void process(Object row, int tag) throws HiveException { int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; - // For outer join we must apply the filter after match and cause some matches to become - // non-matches, we do not track non-matches here. Instead we remember all non spilled rows - // and compute non matches later in finishOuter. int hashMapResultCount = 0; - int matchCount = 0; - int nonSpillCount = 0; + int allMatchCount = 0; + int equalKeySeriesCount = 0; int spillCount = 0; + boolean atLeastOneNonMatch = someRowsFilteredOut; + /* * Single-Column String specific variables. */ @@ -219,9 +237,11 @@ public void process(Object row, int tag) throws HiveException { JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH; // Logical loop over the rows in the batch since the batch may have selected in use. - for (int logical = 0; logical < inputLogicalSize; logical++) { + for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); + /* * Single-Column String outer null detection. */ @@ -237,8 +257,8 @@ public void process(Object row, int tag) throws HiveException { // Let a current SPILL equal key series keep going, or // Let a current NOMATCH keep not matching. - // Remember non-matches for Outer Join. - nonSpills[nonSpillCount++] = batchIndex; + atLeastOneNonMatch = true; + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { @@ -258,9 +278,12 @@ public void process(Object row, int tag) throws HiveException { // New key. if (haveSaveKey) { - // Move on with our count(s). + // Move on with our counts. switch (saveJoinResult) { case MATCH: + hashMapResultCount++; + equalKeySeriesCount++; + break; case SPILL: hashMapResultCount++; break; @@ -286,43 +309,69 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[batchIndex]; int keyStart = start[batchIndex]; int keyLength = length[batchIndex]; - saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name()); - } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name()); - } - /* - * Common outer join result processing. - */ + /* + * Common outer join result processing. + */ - switch (saveJoinResult) { - case MATCH: - matchs[matchCount] = batchIndex; - matchHashMapResultIndices[matchCount] = hashMapResultCount; - matchCount++; - nonSpills[nonSpillCount++] = batchIndex; - break; - - case SPILL: - spills[spillCount] = batchIndex; - spillHashMapResultIndices[spillCount] = hashMapResultCount; - spillCount++; - break; - - case NOMATCH: - nonSpills[nonSpillCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); - break; + switch (saveJoinResult) { + case MATCH: + equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; + equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; + equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); + equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; + allMatchs[allMatchCount++] = batchIndex; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); + break; + + case SPILL: + spills[spillCount] = batchIndex; + spillHashMapResultIndices[spillCount] = hashMapResultCount; + spillCount++; + break; + + case NOMATCH: + atLeastOneNonMatch = true; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); + break; + } + } else { + // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); + + // Series of equal keys. + + switch (saveJoinResult) { + case MATCH: + equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; + allMatchs[allMatchCount++] = batchIndex; + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); + break; + + case SPILL: + spills[spillCount] = batchIndex; + spillHashMapResultIndices[spillCount] = hashMapResultCount; + spillCount++; + break; + + case NOMATCH: + // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); + break; + } + } + if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { + throw new HiveException("allMatchs is not in sort order and unique"); } } } if (haveSaveKey) { - // Account for last equal key sequence. + // Update our counts for the last key. switch (saveJoinResult) { case MATCH: + hashMapResultCount++; + equalKeySeriesCount++; + break; case SPILL: hashMapResultCount++; break; @@ -333,27 +382,26 @@ public void process(Object row, int tag) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " batch #" + batchCounter + - " matchs " + intArrayToRangesString(matchs, matchCount) + - " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) + - " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) + + " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) + + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + + " atLeastOneNonMatch " + atLeastOneNonMatch + + " inputSelectedInUse " + inputSelectedInUse + + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount))); } // We will generate results for all matching and non-matching rows. - // Note that scratch1 is undefined at this point -- it's preallocated storage. - numSel = finishOuter(batch, - matchs, matchHashMapResultIndices, matchCount, - nonSpills, nonSpillCount, - spills, spillHashMapResultIndices, spillCount, - hashMapResults, hashMapResultCount, - scratch1); + finishOuter(batch, + allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, + inputSelectedInUse, inputLogicalSize, + spillCount, hashMapResultCount); } - batch.selectedInUse = true; - batch.size = numSel; - if (batch.size > 0) { // Forward any remaining selected rows. forwardBigTableBatch(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java index c8359d3..8e5fb78 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java @@ -91,7 +91,7 @@ private void setupOutputFileStreams() throws IOException { } tmpFile = File.createTempFile("BytesContainer", ".tmp", parentFile); - LOG.info("BytesContainer created temp file " + tmpFile.getAbsolutePath()); + LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath()); tmpFile.deleteOnExit(); fileOutputStream = new FileOutputStream(tmpFile); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 0796406..865a98c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -54,13 +54,13 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); - // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); + // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); keysAssigned++; } else { // Add another value. - // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); + // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength); - // LOG.info("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); + // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } } @@ -77,7 +77,7 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, if (valueRefWord == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - // LOG.info("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); + // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); optimizedHashMapResult.set(valueStore, valueRefWord); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index d685c22..c028083 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -49,11 +49,11 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); + // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); keysAssigned++; } else { // Add another value. - // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); + // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 594a77f..5dc78ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -79,13 +79,13 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr while (true) { int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); + // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); + // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } @@ -155,7 +155,7 @@ private void expandAndRehash() { } // Use old value reference word. - // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); newSlotTriples[newTripleIndex] = keyRef; newSlotTriples[newTripleIndex + 1] = hashCode; @@ -170,7 +170,7 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); + // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } protected long findReadSlot(byte[] keyBytes, int keyStart, int keyLength, long hashCode) { @@ -181,7 +181,7 @@ protected long findReadSlot(byte[] keyBytes, int keyStart, int keyLength, long h int i = 0; while (true) { int tripleIndex = slot * 3; - // LOG.info("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); + // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); if (slotTriples[tripleIndex] != 0 && hashCode == slotTriples[tripleIndex + 1]) { // Finally, verify the key bytes match. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java index f2f42ee..9d95d05 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java @@ -112,7 +112,7 @@ public long add(byte[] keyBytes, int keyStart, int keyLength) { } keyRefWord |= absoluteKeyOffset; - // LOG.info("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord)); + // LOG.debug("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord)); return keyRefWord; } @@ -122,7 +122,7 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn); - // LOG.info("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); + // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); if (isKeyLengthSmall && storedKeyLengthLength != keyLength) { return false; @@ -135,7 +135,7 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL // Read big value length we wrote with the value. storedKeyLengthLength = writeBuffers.readVInt(readPos); if (storedKeyLengthLength != keyLength) { - // LOG.info("VectorMapJoinFastKeyStore equalKey no match big length"); + // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length"); return false; } } @@ -148,11 +148,11 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL for (int i = 0; i < keyLength; i++) { if (currentBytes[currentStart + i] != keyBytes[keyStart + i]) { - // LOG.info("VectorMapJoinFastKeyStore equalKey no match on bytes"); + // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); return false; } } - // LOG.info("VectorMapJoinFastKeyStore equalKey match on bytes"); + // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes"); return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index 3a0b380..76ecf3a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -68,7 +68,7 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMapResult.forget(); long hashCode = VectorMapJoinFastLongHashUtil.hashKey(key); - // LOG.info("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode); + // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode); long valueRef = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; if (valueRef == -1) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index b448e1f..c35c0c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -121,13 +121,13 @@ public void add(long key, BytesWritable currentValue) { int pairIndex = 2 * slot; long valueRef = slotPairs[pairIndex]; if (valueRef == 0) { - // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); isNewKey = true; break; } long tableKey = slotPairs[pairIndex + 1]; if (key == tableKey) { - // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); isNewKey = false; break; } @@ -145,7 +145,7 @@ public void add(long key, BytesWritable currentValue) { // debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot); } - // LOG.info("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode)); + // LOG.debug("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode)); assignSlot(slot, key, isNewKey, currentValue); @@ -206,7 +206,7 @@ private void expandAndRehash() { } // Use old value reference word. - // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); newSlotPairs[newPairIndex] = valueRef; newSlotPairs[newPairIndex + 1] = tableKey; @@ -220,7 +220,7 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); + // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } protected long findReadSlot(long key, long hashCode) { @@ -235,20 +235,20 @@ protected long findReadSlot(long key, long hashCode) { long valueRef = slotPairs[pairIndex]; if (valueRef == 0) { // Given that we do not delete, an empty slot means no match. - // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); return -1; } long tableKey = slotPairs[pairIndex + 1]; if (key == tableKey) { - // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); return slotPairs[pairIndex]; } // Some other key (collision) - keep probing. probeSlot += (++i); if (i > largestNumberOfSteps) { - // LOG.info("VectorMapJoinFastLongHashTable findReadSlot returning not found"); + // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found"); // We know we never went that far when we were inserting. - // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")"); + // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")"); return -1; } slot = (int)(probeSlot & logicalHashBucketMask); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 3789275..872ffda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -75,16 +75,16 @@ public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, this.keyCount = keyCount; this.memUsage = memUsage; - // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); - // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold); - // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); - // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize); - // LOG.info("VectorMapJoinFastTableContainer load memUsage " + memUsage); + // LOG.debug("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); + // LOG.debug("VectorMapJoinFastTableContainer load threshold " + threshold); + // LOG.debug("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); + // LOG.debug("VectorMapJoinFastTableContainer load wbSize " + wbSize); + // LOG.debug("VectorMapJoinFastTableContainer load memUsage " + memUsage); int newThreshold = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, keyCount); - // LOG.info("VectorMapJoinFastTableContainer load newThreshold " + newThreshold); + // LOG.debug("VectorMapJoinFastTableContainer load newThreshold " + newThreshold); VectorMapJoinFastHashTable = createHashTable(newThreshold); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java index caa705c..6491dc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java @@ -142,7 +142,7 @@ public HashMapResult() { } public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) { - // LOG.info("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord)); + // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord)); this.valueStore = valueStore; this.valueRefWord = valueRefWord; @@ -473,7 +473,7 @@ public long addFirst(byte[] valueBytes, int valueStart, int valueLength) { valueRefWord |= SmallValueLength.allBitsOnBitShifted; } - // LOG.info("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord)); + // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord)); // The lower bits are the absolute value offset. valueRefWord |= newAbsoluteOffset; @@ -499,7 +499,7 @@ public long addMore(long oldValueRef, byte[] valueBytes, int valueStart, int val boolean isOldValueLast = ((oldValueRef & IsLastFlag.flagOnMask) != 0); - // LOG.info("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef)); + // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef)); /* * Write information about the old value (which becomes our next) at the beginning @@ -546,7 +546,7 @@ public long addMore(long oldValueRef, byte[] valueBytes, int valueStart, int val // The lower bits are the absolute value offset. newValueRef |= newAbsoluteOffset; - // LOG.info("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef)); + // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef)); return newValueRef; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java index 60825ce..dc65eaa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java @@ -113,7 +113,7 @@ public void adaptPutRow(VectorMapJoinOptimizedHashTable hashTable, } // byte[] bytes = Arrays.copyOf(currentKey.get(), currentKey.getLength()); - // LOG.info("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes)); + // LOG.debug("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes)); } @@ -145,7 +145,7 @@ public SerializedBytes serialize(long key) throws IOException { } // byte[] bytes = Arrays.copyOf(output.getData(), output.getLength()); - // LOG.info("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes)); + // LOG.debug("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes)); serializedBytes.bytes = output.getData(); serializedBytes.offset = 0; diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q new file mode 100644 index 0000000..aa51f1f --- /dev/null +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -0,0 +1,158 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); + +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)); diff --git ql/src/test/queries/clientpositive/vector_join_filters.q ql/src/test/queries/clientpositive/vector_join_filters.q new file mode 100644 index 0000000..bd2d13c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_join_filters.q @@ -0,0 +1,36 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +CREATE TABLE myinput1_txt(key int, value int); +LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt; +CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); +SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); +SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_join_nulls.q ql/src/test/queries/clientpositive/vector_join_nulls.q new file mode 100644 index 0000000..8286c62 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_join_nulls.q @@ -0,0 +1,31 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.fetch.task.conversion=none; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +CREATE TABLE myinput1_txt(key int, value int); +LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt; +CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; + +SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value); +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value; + diff --git ql/src/test/results/clientpositive/tez/vector_join30.q.out ql/src/test/results/clientpositive/tez/vector_join30.q.out new file mode 100644 index 0000000..4abb68a --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_join30.q.out @@ -0,0 +1,1363 @@ +PREHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orcsrc +POSTHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcsrc +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Reducer 5 + 2 Reducer 7 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 diff --git ql/src/test/results/clientpositive/tez/vector_join_filters.q.out ql/src/test/results/clientpositive/tez/vector_join_filters.q.out new file mode 100644 index 0000000..38a6b24 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_join_filters.q.out @@ -0,0 +1,218 @@ +PREHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@myinput1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@myinput1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +Warning: Map Join MAPJOIN[15][bigTable=b] in task 'Map 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 diff --git ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out new file mode 100644 index 0000000..ae2106c --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out @@ -0,0 +1,191 @@ +PREHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@myinput1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@myinput1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4509856 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542003 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542038 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543491 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542003 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3079923 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4509891 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3113558 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3079923 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3113558 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 diff --git ql/src/test/results/clientpositive/vector_join30.q.out ql/src/test/results/clientpositive/vector_join30.q.out new file mode 100644 index 0000000..1970fa4 --- /dev/null +++ ql/src/test/results/clientpositive/vector_join30.q.out @@ -0,0 +1,2190 @@ +PREHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orcsrc +POSTHOOK: query: CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcsrc +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-5, Stage-6 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2 + Stage-7 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2 + Stage-7 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-9 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-10, Stage-11, Stage-12, Stage-2 + Stage-10 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-10 + Stage-3 depends on stages: Stage-2, Stage-6, Stage-7, Stage-8 + Stage-11 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-11 + Stage-12 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-12 + Stage-2 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + $INTNAME2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME2 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + $INTNAME2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME2 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 + Stage-2 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + $INTNAME2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME2 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + $INTNAME2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME2 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + $INTNAME2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME2 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@orcsrc +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcsrc +#### A masked pattern was here #### +348019368476 diff --git ql/src/test/results/clientpositive/vector_join_filters.q.out ql/src/test/results/clientpositive/vector_join_filters.q.out new file mode 100644 index 0000000..9fda34e --- /dev/null +++ ql/src/test/results/clientpositive/vector_join_filters.q.out @@ -0,0 +1,218 @@ +PREHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@myinput1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@myinput1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +Warning: Map Join MAPJOIN[17][bigTable=a] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +Warning: Map Join MAPJOIN[17][bigTable=b] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 diff --git ql/src/test/results/clientpositive/vector_join_nulls.q.out ql/src/test/results/clientpositive/vector_join_nulls.q.out new file mode 100644 index 0000000..673cbfa --- /dev/null +++ ql/src/test/results/clientpositive/vector_join_nulls.q.out @@ -0,0 +1,191 @@ +PREHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: CREATE TABLE myinput1_txt(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1_txt +PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@myinput1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@myinput1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4509856 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542003 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542038 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543491 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542003 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3079923 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4509891 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3113558 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3079923 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3113558 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070