diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java index 385f6fe..329b73b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java @@ -96,9 +96,10 @@ public abstract class AbstractMapJoinOperator extends Co posBigTable = conf.getPosBigTable(); emptyList = new RowContainer>(1, hconf); + RowContainer bigPosRC = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get((byte) posBigTable), - order[posBigTable], joinCacheSize,spillTableDesc, conf,noOuterJoin); + order[posBigTable], joinCacheSize,spillTableDesc, conf, !hasFilter(posBigTable)); storage.put((byte) posBigTable, bigPosRC); mapJoinRowsKey = HiveConf.getIntVar(hconf, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 9e84e85..f1481a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -36,13 +36,12 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.BooleanWritable; /** * Join operator implementation. @@ -98,6 +97,8 @@ public abstract class CommonJoinOperator extends */ protected transient Map> joinFilters; + protected transient int[][] filterMap; + /** * The ObjectInspectors for the join inputs. */ @@ -260,6 +261,8 @@ public abstract class CommonJoinOperator extends joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors( joinValuesObjectInspectors,NOTSKIPBIGTABLE); + filterMap = conf.getFilterMap(); + if (noOuterJoin) { rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors; } else { @@ -270,7 +273,7 @@ public abstract class CommonJoinOperator extends rcOIs.addAll(joinValuesObjectInspectors.get(alias)); // for each alias, add object inspector for boolean as the last element rcOIs.add( - PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + PrimitiveObjectInspectorFactory.writableByteObjectInspector); rowContainerObjectInspectors.put(alias, rcOIs); } rowContainerStandardObjectInspectors = @@ -304,13 +307,13 @@ public abstract class CommonJoinOperator extends // add whether the row is filtered or not // this value does not matter for the dummyObj // because the join values are already null - nr.add(new BooleanWritable(false)); + nr.add(new ByteWritable()); } dummyObj[pos] = nr; // there should be only 1 dummy object in the RowContainer RowContainer> values = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get((byte)pos), - alias, 1, spillTableDesc, conf, noOuterJoin); + alias, 1, spillTableDesc, conf, !hasFilter(pos)); values.add((ArrayList) dummyObj[pos]); dummyObjVectors[pos] = values; @@ -319,7 +322,7 @@ public abstract class CommonJoinOperator extends // e.g., the output columns does not contains the input table RowContainer rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get((byte)pos), - alias, joinCacheSize,spillTableDesc, conf,noOuterJoin); + alias, joinCacheSize,spillTableDesc, conf, !hasFilter(pos)); storage.put(pos, rc); pos++; @@ -443,14 +446,11 @@ transient boolean newGroupStarted = false; private ArrayList joinObjectsLeftOuterJoin( ArrayList resNulls, ArrayList inputNulls, - ArrayList newObj, IntermediateObject intObj, int left, + ArrayList newObj, IntermediateObject intObj, int left, int right, boolean newObjNull) { // newObj is null if is already null or // if the row corresponding to the left alias does not pass through filter - int filterIndex = joinValues.get(order[left]).size(); - if(filterIndex < intObj.getObjs()[left].size()) { - newObjNull = newObjNull || ((BooleanWritable) (intObj.getObjs()[left].get(filterIndex))).get(); - } + newObjNull |= isLeftFiltered(left, right, intObj.getObjs()[left]); Iterator nullsIter = inputNulls.iterator(); while (nullsIter.hasNext()) { @@ -470,7 +470,7 @@ transient boolean newGroupStarted = false; private ArrayList joinObjectsRightOuterJoin( ArrayList resNulls, ArrayList inputNulls, - ArrayList newObj, IntermediateObject intObj, int left, + ArrayList newObj, IntermediateObject intObj, int left, int right, boolean newObjNull, boolean firstRow) { if (newObjNull) { return resNulls; @@ -498,7 +498,7 @@ transient boolean newGroupStarted = false; } // if the row does not pass through filter, all old Objects are null - if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) { + if (isRightFiltered(left, right, newObj)) { allOldObjsNull = true; } nullsIter = inputNulls.iterator(); @@ -526,7 +526,7 @@ transient boolean newGroupStarted = false; private ArrayList joinObjectsFullOuterJoin( ArrayList resNulls, ArrayList inputNulls, - ArrayList newObj, IntermediateObject intObj, int left, + ArrayList newObj, IntermediateObject intObj, int left, int right, boolean newObjNull, boolean firstRow) { if (newObjNull) { Iterator nullsIter = inputNulls.iterator(); @@ -562,7 +562,7 @@ transient boolean newGroupStarted = false; } // if the row does not pass through filter, all old Objects are null - if (((BooleanWritable)newObj.get(newObj.size()-1)).get()) { + if (isRightFiltered(left, right, newObj)) { allOldObjsNull = true; } boolean rhsPreserved = false; @@ -572,9 +572,8 @@ transient boolean newGroupStarted = false; boolean[] oldNulls = nullsIter.next(); // old obj is null even if the row corresponding to the left alias // does not pass through filter - boolean oldObjNull = oldNulls[left] || ((BooleanWritable) - (intObj.getObjs()[left].get(joinValues.get(order[left]).size()))).get() - || allOldObjsNull; + boolean oldObjNull = oldNulls[left] || allOldObjsNull + || isLeftFiltered(left, right, intObj.getObjs()[left]); if (!oldObjNull) { boolean[] newNulls = new boolean[intObj.getCurSize()]; copyOldArray(oldNulls, newNulls); @@ -623,6 +622,7 @@ transient boolean newGroupStarted = false; } int left = condn[joinPos - 1].getLeft(); + int right = condn[joinPos - 1].getRight(); int type = condn[joinPos - 1].getType(); // process all nulls for RIGHT and FULL OUTER JOINS @@ -646,17 +646,17 @@ transient boolean newGroupStarted = false; newObjNull); } else if (type == JoinDesc.LEFT_OUTER_JOIN) { return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj, - left, newObjNull); + left, right, newObjNull); } else if (type == JoinDesc.RIGHT_OUTER_JOIN) { return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj, - left, newObjNull, firstRow); + left, right, newObjNull, firstRow); } else if (type == JoinDesc.LEFT_SEMI_JOIN) { return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj, left, newObjNull); } assert (type == JoinDesc.FULL_OUTER_JOIN); - return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left, + return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left, right, newObjNull, firstRow); } @@ -821,19 +821,14 @@ transient boolean newGroupStarted = false; hasEmpty = true; alw.add((ArrayList) dummyObj[i]); } else if (!hasEmpty && alw.size() == 1) { - ArrayList row = alw.first(); - int numValues = joinValues.get(alias).size(); - if (row == dummyObj[alias] - || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) { + if (hasAnyFiltered(alias, alw.first())) { hasEmpty = true; } } else { mayHasMoreThanOne = true; if (!hasEmpty) { - int numValues = joinValues.get(alias).size(); for (ArrayList row = alw.first(); row != null; row = alw.next()) { - if (row == dummyObj[alias] - || (row.size() > numValues && ((BooleanWritable) (row.get(numValues))).get())) { + if (hasAnyFiltered(alias, row)) { hasEmpty = true; break; } @@ -860,6 +855,31 @@ transient boolean newGroupStarted = false; } } + private boolean isLeftFiltered(int left, int right, ArrayList leftObj) { + if (joinValues.get(order[left]).size() < leftObj.size()) { + ByteWritable filter = (ByteWritable) leftObj.get(leftObj.size() - 1); + return JoinUtil.isFiltered(filter.get(), right); + } + return false; + } + + private boolean isRightFiltered(int left, int right, ArrayList rightObj) { + if (joinValues.get(order[right]).size() < rightObj.size()) { + ByteWritable filter = (ByteWritable) rightObj.get(rightObj.size() - 1); + return JoinUtil.isFiltered(filter.get(), left); + } + return false; + } + + private boolean hasAnyFiltered(int alias, ArrayList row) { + return row == dummyObj[alias] || + hasFilter(alias) && JoinUtil.hasAnyFiltered(((ByteWritable) row.get(row.size() - 1)).get()); + } + + protected final boolean hasFilter(int alias) { + return filterMap != null && filterMap[alias] != null; + } + protected void reportProgress() { // Send some status periodically countAfterReport++; @@ -872,25 +892,6 @@ transient boolean newGroupStarted = false; } /** - * Returns true if the row does not pass through filters. - */ - protected static Boolean isFiltered(Object row, - List filters, List ois) - throws HiveException { - // apply join filters on the row. - Boolean ret = false; - for (int j = 0; j < filters.size(); j++) { - Object condition = filters.get(j).evaluate(row); - ret = (Boolean) ((PrimitiveObjectInspector) - ois.get(j)).getPrimitiveJavaObject(condition); - if (ret == null || !ret) { - return true; - } - } - return false; - } - - /** * All done. * */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java index 13884cd..7e6377a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.util.ReflectionUtils; @@ -86,6 +85,8 @@ public class HashTableSinkOperator extends TerminalOperator i */ protected transient Map> joinFilters; + protected transient int[][] filterMap; + protected transient int numAliases; // number of aliases /** * The expressions for join outputs. @@ -197,6 +198,7 @@ public class HashTableSinkOperator extends TerminalOperator i totalSz = 0; noOuterJoin = conf.isNoOuterJoin(); + filterMap = conf.getFilterMap(); // process join keys joinKeys = new HashMap>(); @@ -228,10 +230,12 @@ public class HashTableSinkOperator extends TerminalOperator i if (alias == posBigTableAlias) { continue; } - ArrayList rcOIs = new ArrayList(); - rcOIs.addAll(joinValuesObjectInspectors.get(alias)); - // for each alias, add object inspector for boolean as the last element - rcOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + List rcOIs = joinValuesObjectInspectors.get(alias); + if (filterMap != null && filterMap[alias] != null) { + // for each alias, add object inspector for filter tag as the last element + rcOIs = new ArrayList(rcOIs); + rcOIs.add(PrimitiveObjectInspectorFactory.writableByteObjectInspector); + } rowContainerObjectInspectors.put(alias, rcOIs); } rowContainerStandardObjectInspectors = getStandardObjectInspectors(rowContainerObjectInspectors); @@ -318,7 +322,7 @@ public class HashTableSinkOperator extends TerminalOperator i Object[] value = JoinUtil.computeMapJoinValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors - .get(alias), noOuterJoin); + .get(alias), filterMap == null ? null : filterMap[alias]); HashMapWrapper hashTable = mapJoinTables diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index e3ed13a..992df5e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -83,7 +83,8 @@ public class JoinOperator extends CommonJoinOperator implements ArrayList nr = JoinUtil.computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), - joinFilterObjectInspectors.get(alias), noOuterJoin); + joinFilterObjectInspectors.get(alias), + filterMap == null ? null : filterMap[alias]); if (handleSkewJoin) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java index 65a4c18..62e4326 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java @@ -45,7 +45,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; @@ -207,14 +207,14 @@ public class JoinUtil { public static Object[] computeMapJoinValues(Object row, List valueFields, List valueFieldsOI, List filters, List filtersOI, - boolean noOuterJoin) throws HiveException { + int[] filterMap) throws HiveException { // Compute the keys Object[] nr; - if (!noOuterJoin) { + if (filterMap != null) { nr = new Object[valueFields.size()+1]; // add whether the row is filtered or not. - nr[valueFields.size()] = new BooleanWritable(isFiltered(row, filters, filtersOI)); + nr[valueFields.size()] = new ByteWritable(isFiltered(row, filters, filtersOI, filterMap)); }else{ nr = new Object[valueFields.size()]; } @@ -235,7 +235,7 @@ public class JoinUtil { public static ArrayList computeValues(Object row, List valueFields, List valueFieldsOI, List filters, List filtersOI, - boolean noOuterJoin) throws HiveException { + int[] filterMap) throws HiveException { // Compute the values ArrayList nr = new ArrayList(valueFields.size()); @@ -244,54 +244,77 @@ public class JoinUtil { .evaluate(row), valueFieldsOI.get(i), ObjectInspectorCopyOption.WRITABLE)); } - if (!noOuterJoin) { + if (filterMap != null) { // add whether the row is filtered or not. - nr.add(new BooleanWritable(isFiltered(row, filters, filtersOI))); + nr.add(new ByteWritable(isFiltered(row, filters, filtersOI, filterMap))); } return nr; } + + private static final byte[] MASKS = new byte[] + {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte) 0x80}; + /** * Returns true if the row does not pass through filters. */ - protected static Boolean isFiltered(Object row, - List filters, List ois) - throws HiveException { + protected static byte isFiltered(Object row, List filters, + List ois, int[] filterMap) throws HiveException { // apply join filters on the row. - Boolean ret = false; - for (int j = 0; j < filters.size(); j++) { - Object condition = filters.get(j).evaluate(row); - ret = (Boolean) ((PrimitiveObjectInspector) - ois.get(j)).getPrimitiveJavaObject(condition); - if (ret == null || !ret) { - return true; + byte ret = 0; + int j = 0; + for (int i = 0; i < filterMap.length; i += 2) { + int tag = filterMap[i]; + int length = filterMap[i + 1]; + + boolean passed = true; + for (; length > 0; length--, j++) { + if (passed) { + Object condition = filters.get(j).evaluate(row); + Boolean result = (Boolean) ((PrimitiveObjectInspector) + ois.get(j)).getPrimitiveJavaObject(condition); + if (result == null || !result) { + passed = false; + } + } + } + if (!passed) { + ret |= MASKS[tag]; } } - return false; + return ret; + } + + protected static boolean isFiltered(byte filter, int tag) { + return (filter & MASKS[tag]) != 0; + } + + protected static boolean hasAnyFiltered(byte tag) { + return tag != 0; } public static TableDesc getSpillTableDesc(Byte alias, Map spillTableDesc,JoinDesc conf, - boolean noOuterJoin) { + boolean noFilter) { if (spillTableDesc == null || spillTableDesc.size() == 0) { - spillTableDesc = initSpillTables(conf,noOuterJoin); + spillTableDesc = initSpillTables(conf,noFilter); } return spillTableDesc.get(alias); } public static Map getSpillTableDesc( Map spillTableDesc,JoinDesc conf, - boolean noOuterJoin) { + boolean noFilter) { if (spillTableDesc == null) { - spillTableDesc = initSpillTables(conf,noOuterJoin); + spillTableDesc = initSpillTables(conf,noFilter); } return spillTableDesc; } public static SerDe getSpillSerDe(byte alias, Map spillTableDesc,JoinDesc conf, - boolean noOuterJoin) { - TableDesc desc = getSpillTableDesc(alias, spillTableDesc, conf, noOuterJoin); + boolean noFilter) { + TableDesc desc = getSpillTableDesc(alias, spillTableDesc, conf, noFilter); if (desc == null) { return null; } @@ -306,7 +329,7 @@ public class JoinUtil { return sd; } - public static Map initSpillTables(JoinDesc conf,boolean noOuterJoin) { + public static Map initSpillTables(JoinDesc conf, boolean noFilter) { Map> exprs = conf.getExprs(); Map spillTableDesc = new HashMap(exprs.size()); for (int tag = 0; tag < exprs.size(); tag++) { @@ -325,10 +348,10 @@ public class JoinUtil { colTypes.append(valueCols.get(k).getTypeString()); colTypes.append(','); } - if (!noOuterJoin) { + if (!noFilter) { colNames.append("filtered"); colNames.append(','); - colTypes.append(TypeInfoFactory.booleanTypeInfo.getTypeName()); + colTypes.append(TypeInfoFactory.byteTypeInfo.getTypeName()); colTypes.append(','); } // remove the last ',' @@ -352,11 +375,10 @@ public class JoinUtil { public static RowContainer getRowContainer(Configuration hconf, List structFieldObjectInspectors, Byte alias,int containerSize, Map spillTableDesc, - JoinDesc conf,boolean noOuterJoin) throws HiveException { + JoinDesc conf,boolean noFilter) throws HiveException { - TableDesc tblDesc = JoinUtil.getSpillTableDesc(alias,spillTableDesc,conf, noOuterJoin); - SerDe serde = JoinUtil.getSpillSerDe(alias, spillTableDesc, conf, - noOuterJoin); + TableDesc tblDesc = JoinUtil.getSpillTableDesc(alias,spillTableDesc,conf, noFilter); + SerDe serde = JoinUtil.getSpillSerDe(alias, spillTableDesc, conf, noFilter); if (serde == null) { containerSize = -1; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 755c72b..1cb4df9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -238,7 +238,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem joinKeysObjectInspectors.get(alias)); ArrayList value = JoinUtil.computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors - .get(alias), noOuterJoin); + .get(alias), filterMap == null ? null : filterMap[alias]); // Add the value to the ArrayList diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java index 5fe38de..91f8ce0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java @@ -110,11 +110,11 @@ public class SMBMapJoinOperator extends AbstractMapJoinOperator imp for (Byte alias : order) { RowContainer rc = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get(storePos), - alias, bucketSize,spillTableDesc, conf,noOuterJoin); + alias, bucketSize,spillTableDesc, conf, !hasFilter(storePos)); nextGroupStorage[storePos] = rc; RowContainer candidateRC = JoinUtil.getRowContainer(hconf, rowContainerStandardObjectInspectors.get((byte)storePos), - alias,bucketSize,spillTableDesc, conf,noOuterJoin); + alias,bucketSize,spillTableDesc, conf, !hasFilter(storePos)); candidateStorage[alias] = candidateRC; storePos++; } @@ -224,7 +224,8 @@ public class SMBMapJoinOperator extends AbstractMapJoinOperator imp joinKeysObjectInspectors.get(alias)); ArrayList value = JoinUtil.computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), - joinFilterObjectInspectors.get(alias), noOuterJoin); + joinFilterObjectInspectors.get(alias), + filterMap == null ? null : filterMap[alias]); //have we reached a new key group? diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java index cf3baa6..d861d19 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java @@ -116,6 +116,7 @@ public class SkewJoinHandler { bigKeysExistingMap = new HashMap(numAliases); taskId = Utilities.getTaskId(hconf); + int[][] filterMap = desc.getFilterMap(); for (int i = 0; i < numAliases; i++) { Byte alias = conf.getTagOrder()[i]; List skewTableKeyInspectors = new ArrayList(); @@ -145,7 +146,9 @@ public class SkewJoinHandler { break; } - TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias,joinOp.spillTableDesc,conf,noOuterJoin); + boolean hasFilter = filterMap != null && filterMap[i] != null; + TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, + joinOp.spillTableDesc, conf, !hasFilter); List valColNames = new ArrayList(); if (valTblDesc != null) { valColNames = Utilities.getColumnNames(valTblDesc.getProperties()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 43eef07..2b40342 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -357,8 +357,8 @@ public class MapJoinProcessor implements Transform { valueExprMap.put(Byte.valueOf((byte) pos), values); } - Map> filterMap = desc.getFilters(); - for (Map.Entry> entry : filterMap.entrySet()) { + Map> filters = desc.getFilters(); + for (Map.Entry> entry : filters.entrySet()) { Byte srcAlias = entry.getKey(); List columnDescList = entry.getValue(); @@ -405,6 +405,7 @@ public class MapJoinProcessor implements Transform { List valueTableDescs = new ArrayList(); List valueFiltedTableDescs = new ArrayList(); + int[][] filterMap = desc.getFilterMap(); for (pos = 0; pos < newParentOps.size(); pos++) { List valueCols = valueExprMap.get(Byte.valueOf((byte) pos)); int length = valueCols.size(); @@ -413,11 +414,9 @@ public class MapJoinProcessor implements Transform { for (int i = 0; i < length; i++) { valueFilteredCols.add(valueCols.get(i).clone()); } - List valueFilters = filterMap.get(Byte.valueOf((byte) pos)); - - if (valueFilters != null && valueFilters.size() != 0 && pos != mapJoinPos) { + if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) { ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory - .getPrimitiveTypeInfo(Constants.BOOLEAN_TYPE_NAME), "filter", "filter", false); + .getPrimitiveTypeInfo(Constants.TINYINT_TYPE_NAME), "filter", "filter", false); valueFilteredCols.add(isFilterDesc); } @@ -446,9 +445,10 @@ public class MapJoinProcessor implements Transform { } MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, valueExprMap, valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns, - filterMap, op.getConf().getNoOuterJoin(), dumpFilePrefix); + filters, op.getConf().getNoOuterJoin(), dumpFilePrefix); mapJoinDescriptor.setTagOrder(tagOrder); mapJoinDescriptor.setNullSafes(desc.getNullSafes()); + mapJoinDescriptor.setFilterMap(desc.getFilterMap()); MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild( mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java index 6ea7d5c..d6c3549 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java @@ -53,6 +53,9 @@ public class QBJoinTree implements Serializable{ // filters private ArrayList> filters; + // outer-pos : xn + private int[][] filterMap; + // filters for pushing private ArrayList> filtersForPushing; @@ -305,4 +308,16 @@ public class QBJoinTree implements Serializable{ public void setNullSafes(ArrayList nullSafes) { this.nullsafes = nullSafes; } + + public void addFilterMapping(int outer, int target, int length) { + filterMap[outer] = new int[] { target, length }; + } + + public int[][] getFilterMap() { + return filterMap; + } + + public void setFilterMap(int[][] filterMap) { + this.filterMap = filterMap; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d5e03b1..1d21193 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1259,6 +1259,24 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } + private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, + ArrayList leftSrc) throws SemanticException { + if (joinCond == null) { + return; + } + JoinCond cond = joinTree.getJoinCond()[0]; + + JoinType type = cond.getJoinType(); + parseJoinCondition(joinTree, joinCond, leftSrc, type); + + List> filters = joinTree.getFilters(); + if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) { + joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size()); + } + if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) { + joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size()); + } + } /** * Parse the join condition. If the condition is a join condition, throw an * error if it is not an equality. Otherwise, break it into left and right @@ -1279,20 +1297,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * @throws SemanticException */ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, - ArrayList leftSrc) throws SemanticException { + ArrayList leftSrc, JoinType type) throws SemanticException { if (joinCond == null) { return; } - JoinType type = joinTree.getJoinCond()[0].getJoinType(); switch (joinCond.getToken().getType()) { case HiveParser.KW_OR: throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3 .getMsg(joinCond)); case HiveParser.KW_AND: - parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc); - parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc); + parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type); + parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type); break; case HiveParser.EQUAL_NS: @@ -4937,6 +4954,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, join.getNoOuterJoin(), joinCondns, filterMap); desc.setReversedExprs(reversedExprs); + desc.setFilterMap(compactFilter(join.getFilterMap())); + JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc, new RowSchema(outputRS.getColumnInfos()), rightOps); joinOp.setColumnExprMap(colExprMap); @@ -4952,6 +4971,34 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return putOpInsertMap(joinOp, outputRS); } + // remove filterMap for outer alias if filter is not exist on that + private int[][] compactFilter(int[][] filterMap) { + if (filterMap == null) { + return null; + } + for (int i = 0; i < filterMap.length; i++) { + if (filterMap[i] != null) { + boolean noFilter = true; + // join positions for even index, filter lengths for odd index + for (int j = 1; j < filterMap[i].length; j += 2) { + if (filterMap[i][j] > 0) { + noFilter = false; + break; + } + } + if (noFilter) { + filterMap[i] = null; + } + } + } + for (int[] mapping : filterMap) { + if (mapping != null) { + return filterMap; + } + } + return null; + } + @SuppressWarnings("nls") private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree, Operator child, String srcName, int pos) throws SemanticException { @@ -5461,6 +5508,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { filters.add(new ArrayList()); filters.add(new ArrayList()); joinTree.setFilters(filters); + joinTree.setFilterMap(new int[2][]); ArrayList> filtersForPushing = new ArrayList>(); @@ -5578,6 +5626,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { filterPos.addAll(node.getFilters().get(0)); } + int[][] nmap = node.getFilterMap(); + int[][] tmap = target.getFilterMap(); + int[][] newmap = new int[tmap.length + nmap.length - 1][]; + + for (int[] mapping : nmap) { + if (mapping != null) { + for (int i = 0; i < mapping.length; i+=2) { + mapping[i] += trgtRightAliases.length; + } + } + } + if (nmap[0] != null) { + if (tmap[pos] == null) { + tmap[pos] = nmap[0]; + } else { + int[] appended = new int[tmap[pos].length + nmap[0].length]; + System.arraycopy(tmap[pos], 0, appended, 0, tmap[pos].length); + System.arraycopy(nmap[0], 0, appended, tmap[pos].length, nmap[0].length); + tmap[pos] = appended; + } + } + System.arraycopy(tmap, 0, newmap, 0, tmap.length); + System.arraycopy(nmap, 1, newmap, tmap.length, nmap.length - 1); + target.setFilterMap(newmap); + ArrayList> filter = target.getFiltersForPushing(); for (int i = 0; i < nodeRightAliases.length; i++) { filter.add(node.getFiltersForPushing().get(i + 1)); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java index d8e233f..9331f30 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java @@ -20,8 +20,10 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -49,6 +51,9 @@ public class HashTableSinkDesc extends JoinDesc implements Serializable { // alias to filter mapping private Map> filters; + // outer-pos : xn + private int[][] filterMap; + // used for create joinOutputObjectInspector protected List outputColumnNames; @@ -98,6 +103,7 @@ public class HashTableSinkDesc extends JoinDesc implements Serializable { this.smallKeysDirMap = clone.getSmallKeysDirMap(); this.tagOrder = clone.getTagOrder(); this.filters = clone.getFilters(); + this.filterMap = clone.getFilterMap(); this.keys = clone.getKeys(); this.keyTblDesc = clone.getKeyTblDesc(); @@ -285,6 +291,21 @@ public class HashTableSinkDesc extends JoinDesc implements Serializable { this.keyTableDesc = keyTableDesc; } + @Override + public int[][] getFilterMap() { + return filterMap; + } + + @Override + public void setFilterMap(int[][] filterMap) { + this.filterMap = filterMap; + } + + @Override + @Explain(displayName = "filter mappings", normalExplain = false) + public Map getFilgerMapingMap() { + return getFilterMappingMap(filterMap); + } public Map> getRetainList() { return retainList; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index 81ee699..dcf2190 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -52,6 +52,13 @@ public class JoinDesc implements Serializable { // alias to filter mapping private Map> filters; + // pos of outer join alias=xn + // for example, + // a left outer join b on a.k=b.k AND a.k>10 full outer join c on a.k=c.k AND a.k>10 AND c.k>20 + // --> 0=1:1:2:1 + // 2=0:1 + private int[][] filterMap; + // key index to nullsafe join flag private boolean[] nullsafes; @@ -118,6 +125,7 @@ public class JoinDesc implements Serializable { this.smallKeysDirMap = clone.smallKeysDirMap; this.tagOrder = clone.tagOrder; this.filters = clone.filters; + this.filterMap = clone.filterMap; } public Map> getExprs() { @@ -387,4 +395,31 @@ public class JoinDesc implements Serializable { } return hasNS ? Arrays.toString(nullsafes) : null; } + + public int[][] getFilterMap() { + return filterMap; + } + + public void setFilterMap(int[][] filterMap) { + this.filterMap = filterMap; + } + + @Explain(displayName = "filter mappings", normalExplain = false) + public Map getFilgerMapingMap() { + return getFilterMappingMap(filterMap); + } + + protected Map getFilterMappingMap(int[][] filterMap) { + if (filterMap == null) { + return null; + } + Map result = new LinkedHashMap(); + for (int i = 0 ; i < filterMap.length; i++) { + if (filterMap[i] == null) { + continue; + } + result.put(i, Arrays.toString(filterMap[i])); + } + return result.isEmpty() ? null : result; + } } diff --git ql/src/test/queries/clientpositive/join_filters_overlap.q ql/src/test/queries/clientpositive/join_filters_overlap.q new file mode 100644 index 0000000..b0ca43c --- /dev/null +++ ql/src/test/queries/clientpositive/join_filters_overlap.q @@ -0,0 +1,27 @@ +-- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3; + +-- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60); +select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60); +select /*+ MAPJOIN(b,c)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60); + +-- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60); +select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60); +select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60); + +-- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60); +select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60); +select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60); + +-- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40); +select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40); + +-- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40); +select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40); +select /*+ MAPJOIN(b,c, d)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40); diff --git ql/src/test/results/clientpositive/auto_join29.q.out ql/src/test/results/clientpositive/auto_join29.q.out index aa80343..4d5efac 100644 --- ql/src/test/results/clientpositive/auto_join29.q.out +++ ql/src/test/results/clientpositive/auto_join29.q.out @@ -4461,33 +4461,33 @@ POSTHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL -NULL NULL 0 val_0 NULL NULL +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 NULL NULL 10 val_10 NULL NULL NULL NULL 100 val_100 NULL NULL NULL NULL 100 val_100 NULL NULL @@ -4603,7 +4603,7 @@ NULL NULL 197 val_197 NULL NULL NULL NULL 199 val_199 NULL NULL NULL NULL 199 val_199 NULL NULL NULL NULL 199 val_199 NULL NULL -NULL NULL 2 val_2 NULL NULL +NULL NULL 2 val_2 2 val_2 NULL NULL 20 val_20 NULL NULL NULL NULL 200 val_200 NULL NULL NULL NULL 200 val_200 NULL NULL @@ -4813,7 +4813,7 @@ NULL NULL 397 val_397 NULL NULL NULL NULL 397 val_397 NULL NULL NULL NULL 399 val_399 NULL NULL NULL NULL 399 val_399 NULL NULL -NULL NULL 4 val_4 NULL NULL +NULL NULL 4 val_4 4 val_4 NULL NULL 400 val_400 NULL NULL NULL NULL 401 val_401 NULL NULL NULL NULL 401 val_401 NULL NULL @@ -4937,33 +4937,33 @@ NULL NULL 497 val_497 NULL NULL NULL NULL 498 val_498 NULL NULL NULL NULL 498 val_498 NULL NULL NULL NULL 498 val_498 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL -NULL NULL 5 val_5 NULL NULL +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 NULL NULL 51 val_51 NULL NULL NULL NULL 51 val_51 NULL NULL NULL NULL 53 val_53 NULL NULL @@ -4987,7 +4987,7 @@ NULL NULL 76 val_76 NULL NULL NULL NULL 76 val_76 NULL NULL NULL NULL 77 val_77 NULL NULL NULL NULL 78 val_78 NULL NULL -NULL NULL 8 val_8 NULL NULL +NULL NULL 8 val_8 8 val_8 NULL NULL 80 val_80 NULL NULL NULL NULL 82 val_82 NULL NULL NULL NULL 83 val_83 NULL NULL @@ -4997,7 +4997,7 @@ NULL NULL 84 val_84 NULL NULL NULL NULL 85 val_85 NULL NULL NULL NULL 86 val_86 NULL NULL NULL NULL 87 val_87 NULL NULL -NULL NULL 9 val_9 NULL NULL +NULL NULL 9 val_9 9 val_9 NULL NULL 90 val_90 NULL NULL NULL NULL 90 val_90 NULL NULL NULL NULL 90 val_90 NULL NULL diff --git ql/src/test/results/clientpositive/join_filters_overlap.q.out ql/src/test/results/clientpositive/join_filters_overlap.q.out new file mode 100644 index 0000000..813352e --- /dev/null +++ ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -0,0 +1,1088 @@ +PREHOOK: query: -- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: -- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@a +PREHOOK: query: -- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL a) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + expr: value + type: int + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 50) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: int + c + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 60) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 2 + value expressions: + expr: key + type: int + expr: value + type: int + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + filter mappings: + 0 [1, 1, 2, 1] + filter predicates: + 0 {(VALUE._col1 = 50)} {(VALUE._col1 = 60)} + 1 + 2 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col8 + type: int + expr: _col9 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL +100 50 100 50 NULL NULL +100 60 NULL NULL 100 60 +PREHOOK: query: select /*+ MAPJOIN(b,c)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(b,c)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL +100 50 100 50 NULL NULL +100 60 NULL NULL 100 60 +PREHOOK: query: -- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL b) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 50) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + expr: value + type: int + b + TableScan + alias: b + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: int + c + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 60) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 2 + value expressions: + expr: key + type: int + expr: value + type: int + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + filter mappings: + 1 [0, 1, 2, 1] + filter predicates: + 0 + 1 {(VALUE._col1 = 50)} {(VALUE._col1 = 60)} + 2 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col8 + type: int + expr: _col9 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL NULL 100 40 NULL NULL +100 50 100 50 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL NULL 100 40 NULL NULL +100 50 100 50 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: -- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50)) (> (. (TOK_TABLE_OR_COL b) value) 10))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (AND (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL b) value) 60)) (> (. (TOK_TABLE_OR_COL b) value) 20)) (= (. (TOK_TABLE_OR_COL c) value) 60)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 50) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + expr: value + type: int + b + TableScan + alias: b + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: int + c + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 60) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 2 + value expressions: + expr: key + type: int + expr: value + type: int + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + filter mappings: + 1 [0, 2, 2, 2] + filter predicates: + 0 + 1 {(VALUE._col1 = 50)} {(VALUE._col1 > 10)} {(VALUE._col1 = 60)} {(VALUE._col1 > 20)} + 2 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col8 + type: int + expr: _col9 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL NULL 100 40 NULL NULL +100 50 100 50 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +NULL NULL 100 40 NULL NULL +100 50 100 50 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: -- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL b) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60))) (TOK_TABREF (TOK_TABNAME a) d) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL a) value) 40)) (= (. (TOK_TABLE_OR_COL d) value) 40)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + expr: value + type: int + b + TableScan + alias: b + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: int + c + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 60) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 3 + value expressions: + expr: key + type: int + expr: value + type: int + d + TableScan + alias: d + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 40) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 2 + value expressions: + expr: key + type: int + expr: value + type: int + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join0 to 2 + Left Outer Join1 to 3 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + 3 {VALUE._col0} {VALUE._col1} + filter mappings: + 0 [1, 1, 2, 1] + 1 [0, 1, 3, 1] + filter predicates: + 0 {(VALUE._col1 = 50)} {(VALUE._col1 = 40)} + 1 {(VALUE._col1 = 50)} {(VALUE._col1 = 60)} + 2 + 3 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col12 + type: int + expr: _col13 + type: int + expr: _col8 + type: int + expr: _col9 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types int:int:int:int:int:int:int:int + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL 100 40 +NULL NULL 100 40 NULL NULL NULL NULL +100 40 NULL NULL NULL NULL 100 40 +100 40 NULL NULL NULL NULL 100 40 +NULL NULL 100 60 100 60 NULL NULL +100 50 NULL NULL NULL NULL NULL NULL +NULL NULL 100 40 NULL NULL NULL NULL +100 50 100 50 NULL NULL NULL NULL +100 50 NULL NULL NULL NULL NULL NULL +NULL NULL 100 60 100 60 NULL NULL +100 60 NULL NULL NULL NULL NULL NULL +NULL NULL 100 40 NULL NULL NULL NULL +100 60 NULL NULL NULL NULL NULL NULL +100 60 NULL NULL NULL NULL NULL NULL +NULL NULL 100 60 100 60 NULL NULL +PREHOOK: query: -- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +POSTHOOK: query: -- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL a) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60))) (TOK_TABREF (TOK_TABNAME a) d) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL a) value) 40)) (= (. (TOK_TABLE_OR_COL d) value) 40)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + expr: value + type: int + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 50) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + expr: value + type: int + c + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 60) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 2 + value expressions: + expr: key + type: int + expr: value + type: int + d + TableScan + alias: d + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (value = 40) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 3 + value expressions: + expr: key + type: int + expr: value + type: int + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numPartitions 0 + numRows 3 + rawDataSize 18 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + 3 {VALUE._col0} {VALUE._col1} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(VALUE._col1 = 50)} {(VALUE._col1 = 60)} {(VALUE._col1 = 40)} + 1 + 2 + 3 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col8 + type: int + expr: _col9 + type: int + expr: _col12 + type: int + expr: _col13 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types int:int:int:int:int:int:int:int + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL 100 40 +100 50 100 50 NULL NULL NULL NULL +100 60 NULL NULL 100 60 NULL NULL +PREHOOK: query: select /*+ MAPJOIN(b,c, d)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(b,c, d)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL 100 40 +100 50 100 50 NULL NULL NULL NULL +100 60 NULL NULL 100 60 NULL NULL