diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d0895e1..3c94b42 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1279,6 +1279,16 @@ public static String getColumnInternalName(int pos) { return "_col" + pos; } + public static ArrayList getColumnInternalNames(int...colNums) { + ArrayList outPutColumnNames = new ArrayList(); + for (int i = 0; i < colNums.length; i++) { + for (int j = 0; j < colNums[i]; j++) { + outPutColumnNames.add(getColumnInternalName(j)); + } + } + return outPutColumnNames; + } + public static int getPositionFromInternalName(String internalName) { Pattern internalPattern = Pattern.compile("_col([0-9]+)"); Matcher m = internalPattern.matcher(internalName); diff --git a/data/files/kv9.txt b/data/files/kv9.txt new file mode 100644 index 0000000..f43d943 --- /dev/null +++ b/data/files/kv9.txt @@ -0,0 +1,25 @@ +238val_238 +238val_239 +86val_86 +238val_240 +311val_311 +27val_27 +165val_165 +213val_213 +409val_409 +255val_255 +278val_278 +98val_98 +484val_484 +265val_265 +213val_214 +193val_193 +401val_401 +150val_150 +273val_273 +224val_224 +369val_369 +66val_66 +128val_128 +213val_213 +200val_215 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index b575e22..f5bc98d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -3065,5 +3065,9 @@ private static void createTmpDirs(Configuration conf, } } } + + public static String getColumnInternalName(ReduceField field, int pos) { + return field.toString() + "." + HiveConf.getColumnInternalName(pos); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java index 476af4b..d6b74df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java @@ -99,7 +99,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) { List parameters = aggregation.getParameters(); - aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, pGBY)); + aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, RS)); } pctx.setFetchTabledesc(tsDesc); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java index 98fcff5..07de0b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ExtractOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; @@ -41,18 +42,22 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; +import static org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -490,4 +495,137 @@ protected static void removeOperator(Operator target, Operator child, Oper context.getOpParseCtx().put(op, ctx); return op; } + + /** + * Replace keyCols of pRS with keyCols of cRS, for those keyCols of pRS which not in cRS + * move them to the valCols of pRS + * @param pRS + * @param cRS + * @throws SemanticException + */ + public static void replace(ReduceSinkOperator pRS, ReduceSinkOperator cRS, ParseContext context) + throws SemanticException { + ReduceSinkDesc pRSConf = pRS.getConf(), cRSConf = cRS.getConf(); + ArrayList oldKeyExprs = pRSConf.getKeyCols(), valExprs = pRSConf.getValueCols(); + Map oldKeyExprMap = new + HashMap (); + Map oldValExprMap = new + HashMap(); + // cols need to be changed, key=oldKeyPos value=[newPos, key/value] + Map change = new HashMap(); + Map colNameChangeMap = new HashMap(); + ArrayList keyExprs = new ArrayList(); + String newOrder = ""; + for (int i = 0; i < cRSConf.getKeyCols().size(); i++) { + ExprNodeDesc keyExpr = ExprNodeDescUtils.backtrack(cRSConf.getKeyCols().get(i), cRS, pRS); + if (keyExpr != null) { + keyExprs.add(keyExpr); + newOrder += cRSConf.getOrder().charAt(i); + } + } + // newRR consists of three parts: newKeyCol, oldValueCol, newValueCol + RowResolver newRR = new RowResolver(), oldpRSRR = context.getOpParseCtx().get(pRS) + .getRowResolver(); + ArrayList oldColumnInfos = oldpRSRR.getColumnInfos(); + for (int pos = 0; pos < oldKeyExprs.size(); pos++) { + oldKeyExprMap.put(new ExprNodeDesc.ExprNodeDescEqualityWrapper(oldKeyExprs.get(pos)), pos); + change.put(pos, new Object[]{null, ReduceField.VALUE}); + } + for (int i = 0; i < valExprs.size(); i++) { + oldValExprMap.put(new ExprNodeDesc.ExprNodeDescEqualityWrapper(valExprs.get(i)), i); + } + for (int i = 0; i < keyExprs.size(); i++) { + ExprNodeDesc.ExprNodeDescEqualityWrapper exprWrapper = new ExprNodeDesc + .ExprNodeDescEqualityWrapper(keyExprs.get(i)); + if (oldKeyExprMap.containsKey(exprWrapper)) { + Integer keyPos = oldKeyExprMap.get(exprWrapper); + if (i == keyPos) { + change.remove(oldKeyExprMap.get(exprWrapper)); + oldKeyExprMap.remove(exprWrapper); + addColumnInfo(newRR, oldpRSRR, oldColumnInfos.get(keyPos).getInternalName()); + } else { + Object[] newCol = change.get(keyPos); + newCol[0] = i; + newCol[1] = ReduceField.KEY; + addColumnInfo(newRR, oldpRSRR, oldColumnInfos.get(keyPos).getInternalName(), + Utilities.getColumnInternalName(ReduceField.KEY, i)); + } + } + } + addColumnInfo(newRR, oldpRSRR, oldColumnInfos.subList(oldKeyExprs.size(), + oldColumnInfos.size())); + for (Map.Entry entry : change.entrySet()) { + Integer keyPos = entry.getKey(); + String oldKeyOutPutName = oldColumnInfos.get(keyPos).getInternalName(); + Object[] newCol = entry.getValue(); + ExprNodeDesc oldRSExpr = pRS.getColumnExprMap().get(oldColumnInfos.get(keyPos) + .getInternalName()); + if (newCol[1] == ReduceField.VALUE) { + ExprNodeDesc.ExprNodeDescEqualityWrapper exprWrapper = new ExprNodeDesc + .ExprNodeDescEqualityWrapper(oldRSExpr); + if (!oldValExprMap.containsKey(exprWrapper)) { + addColumnInfo(newRR, oldpRSRR, oldKeyOutPutName, Utilities.getColumnInternalName + (ReduceField.VALUE, valExprs.size())); + newCol[0] = valExprs.size(); + valExprs.add(oldRSExpr); + } else { + newCol[0] = oldValExprMap.get(exprWrapper); + } + } + colNameChangeMap.put(oldKeyOutPutName, Utilities.getColumnInternalName((ReduceField) + newCol[1], (Integer) newCol[0])); + } + pRS.setSchema(new RowSchema(newRR.getColumnInfos())); + context.getOpParseCtx().get(pRS).setRowResolver(newRR); + pRS.setConf(PlanUtils.getReduceSinkDesc(keyExprs, valExprs, pRSConf.isIncludeKeyCols() ? + HiveConf.getColumnInternalNames(keyExprs.size(), valExprs.size()) : HiveConf + .getColumnInternalNames(valExprs.size()), pRSConf.isIncludeKeyCols(), pRSConf.getTag(), + pRSConf.getPartitionCols(), newOrder, pRSConf.getNumReducers())); + PlanUtils.genReduceSinkExprMap(pRS); + for (Operator cOp : pRS.getChildOperators()) { + if (cOp instanceof GroupByOperator) { + GroupByDesc conf = ((GroupByOperator) cOp).getConf(); + ExprNodeDescUtils.replaceColumnName(conf.getKeys(), colNameChangeMap); + for (AggregationDesc aggregationDesc : conf.getAggregators()) { + ExprNodeDescUtils.replaceColumnName(aggregationDesc.getParameters(), colNameChangeMap); + } + } + Map newColExprMap = new HashMap(); + if (cOp.getColumnExprMap() != null) { + for (Map.Entry entry : cOp.getColumnExprMap().entrySet()) { + ExprNodeColumnDesc expr = (ExprNodeColumnDesc) entry.getValue(); + if (colNameChangeMap.containsKey(expr.getExprString())) { + newColExprMap.put(entry.getKey(), new ExprNodeColumnDesc(expr.getTypeInfo(), + colNameChangeMap.get(expr.getExprString()), expr.getTabAlias(), + expr.isSkewedCol())); + } else { + newColExprMap.put(entry.getKey(), entry.getValue()); + } + } + } + cOp.setColumnExprMap(newColExprMap); + } + } + + public static void addColumnInfo(RowResolver newRR, RowResolver oldRR, List colInfos) + throws SemanticException { + for (ColumnInfo col : colInfos) { + addColumnInfo(newRR, oldRR, col.getInternalName()); + } + } + + public static void addColumnInfo(RowResolver newRR, RowResolver oldRR, String colName) throws + SemanticException { + addColumnInfo(newRR, oldRR, colName, colName); + } + + public static void addColumnInfo(RowResolver newRR, RowResolver oldRR, String colName, + String newColName) throws SemanticException { + String[] nm = oldRR.reverseLookup(colName); + ColumnInfo colInfo = oldRR.get(nm[0], nm[1]); + ColumnInfo newColInfo = new ColumnInfo(newColName, colInfo.getObjectInspector(), + colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); + newRR.put(nm[0], nm[1], newColInfo); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index b206448..4a6d868 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -22,12 +22,13 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Stack; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExtractOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; @@ -239,22 +240,17 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc * If parent RS has not been assigned any partitioning column, we will use * partitioning columns (if exist) of child RS. */ - protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer) + protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer, + ParseContext context) throws SemanticException { int[] result = checkStatus(cRS, pRS, minReducer); if (result == null) { return false; } - - if (result[0] > 0) { - // The sorting columns of the child RS are more specific than - // those of the parent RS. Assign sorting columns of the child RS - // to the parent RS. - List childKCs = cRS.getConf().getKeyCols(); - pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS)); + if (pRS.getConf().getDistinctColumnIndices().size() == 0) { + CorrelationUtilities.replace(pRS, cRS, context); } - - if (result[1] < 0) { + if (result[0] < 0) { // The partitioning columns of the parent RS are more specific than // those of the child RS. List childPCs = cRS.getConf().getPartitionCols(); @@ -263,7 +259,7 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR // assign these to the partitioning columns of the parent RS. pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS)); } - } else if (result[1] > 0) { + } else if (result[0] > 0) { // The partitioning columns of the child RS are more specific than // those of the parent RS. List parentPCs = pRS.getConf().getPartitionCols(); @@ -275,21 +271,7 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } } - if (result[2] > 0) { - // The sorting order of the child RS is more specific than - // that of the parent RS. Assign the sorting order of the child RS - // to the parent RS. - if (result[0] <= 0) { - // Sorting columns of the parent RS are more specific than those of the - // child RS but Sorting order of the child RS is more specific than - // that of the parent RS. - throw new SemanticException("Sorting columns and order don't match. " + - "Try set " + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION + "=false;"); - } - pRS.getConf().setOrder(cRS.getConf().getOrder()); - } - - if (result[3] > 0) { + if (result[1] > 0) { // The number of reducers of the child RS is more specific than // that of the parent RS. Assign the number of reducers of the child RS // to the parent RS. @@ -300,7 +282,7 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } /** - * Returns merge directions between two RSs for criterias (ordering, number of reducers, + * Returns merge directions between two RSs for criterias (number of reducers, * reducer keys, partition keys). Returns null if any of categories is not mergeable. * * Values for each index can be -1, 0, 1 @@ -312,10 +294,6 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR throws SemanticException { ReduceSinkDesc cConf = cRS.getConf(); ReduceSinkDesc pConf = pRS.getConf(); - Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder()); - if (moveRSOrderTo == null) { - return null; - } Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers()); if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) { @@ -323,17 +301,17 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } List ckeys = cConf.getKeyCols(); List pkeys = pConf.getKeyCols(); - Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS); - if (moveKeyColTo == null) { + if (checkExprs(ckeys, pkeys, cRS, pRS) == null) { return null; } + List cpars = cConf.getPartitionCols(); List ppars = pConf.getPartitionCols(); Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS); if (movePartitionColTo == null) { return null; } - return new int[] {moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo}; + return new int[] {movePartitionColTo, moveReducerNumTo}; } /** @@ -367,23 +345,34 @@ private Integer checkExprs(List ckeys, List pkeys, // backtrack key exprs of child to parent and compare it with parent's protected Integer sameKeys(List cexprs, List pexprs, Operator child, Operator parent) throws SemanticException { - int common = Math.min(cexprs.size(), pexprs.size()); - int limit = Math.max(cexprs.size(), pexprs.size()); - int i = 0; - for (; i < common; i++) { - ExprNodeDesc pexpr = pexprs.get(i); - ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent); - if (cexpr == null || !pexpr.isSame(cexpr)) { - return null; - } - } - for (; i < limit; i++) { - if (cexprs.size() > pexprs.size()) { - if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) { - // cKey is not present in parent + Set pExprNodeWrapperSet = new + HashSet(pexprs.size()), cExprNodeWrapperSet = + new HashSet(cexprs.size()); + for (int i = 0; i < cexprs.size(); i++) { + ExprNodeDesc expr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent); + if (expr == null) { + // expr is the result of a aggregation function + if (i < pexprs.size()) { + // consider a query + // select key, sum(key) as value from src group by key order by value, key; + // if expr exists before group by keys, it can't be optimized return null; } } + cExprNodeWrapperSet.add(new ExprNodeDesc.ExprNodeDescEqualityWrapper(expr)); + } + for (ExprNodeDesc pexpr : pexprs) { + pExprNodeWrapperSet.add(new ExprNodeDesc.ExprNodeDescEqualityWrapper(pexpr)); + } + + if (cexprs.size() >= pexprs.size()) { + if (!cExprNodeWrapperSet.containsAll(pExprNodeWrapperSet)) { + return null; + } + } else { + if (!pExprNodeWrapperSet.containsAll(cExprNodeWrapperSet)) { + return null; + } } return Integer.valueOf(cexprs.size()).compareTo(pexprs.size()); } @@ -445,7 +434,7 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( pGBY, ReduceSinkOperator.class, dedupCtx.trustScript()); - if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { + if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer(), dedupCtx.getPctx())) { CorrelationUtilities.replaceReduceSinkWithSelectOperator( cRS, dedupCtx.getPctx(), dedupCtx); return true; @@ -467,7 +456,7 @@ public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, } ReduceSinkOperator pRS = CorrelationUtilities.getSingleParent(pGBY, ReduceSinkOperator.class); - if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { + if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer(), dedupCtx.getPctx())) { CorrelationUtilities.removeReduceSinkForGroupBy( cRS, cGBY, dedupCtx.getPctx(), dedupCtx); return true; @@ -521,7 +510,7 @@ public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedup ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( cRS, ReduceSinkOperator.class, dedupCtx.trustScript()); - if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { + if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer(), dedupCtx.getPctx())) { CorrelationUtilities.replaceReduceSinkWithSelectOperator( cRS, dedupCtx.getPctx(), dedupCtx); return true; @@ -538,7 +527,7 @@ public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkOperator pRS = CorrelationUtilities.findPossibleParent( start, ReduceSinkOperator.class, dedupCtx.trustScript()); - if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) { + if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer(), dedupCtx.getPctx())) { CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx); return true; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c34b261..746a687 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -5864,6 +5864,8 @@ private Operator genReduceSinkPlan(String dest, QB qb, Operator input, Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( out_rwsch.getColumnInfos()), interim), out_rwsch); + output.setColumnExprMap(colExprMap); + if (LOG.isDebugEnabled()) { LOG.debug("Created ReduceSink Plan for clause: " + dest + " row schema: " + out_rwsch.toString()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java index f8738cd..dd19788 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -84,4 +85,17 @@ public String getTypeString() { public List getChildren() { return new ArrayList(columns); } + + public void setCols(List cols) { + this.columns = cols; + } + + @Override + public int hashCode() { + int superHashCode = super.hashCode(); + HashCodeBuilder builder = new HashCodeBuilder(); + builder.appendSuper(superHashCode); + builder.append(columns); + return builder.toHashCode(); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 635143d..cfceee2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -182,7 +182,10 @@ public static boolean isDeterministic(ExprNodeDesc desc) { Operator current, Operator terminal) throws SemanticException { ArrayList result = new ArrayList(); for (ExprNodeDesc expr : sources) { - result.add(backtrack(expr, current, terminal)); + ExprNodeDesc exprNodeDesc = backtrack(expr, current, terminal); + if (exprNodeDesc != null) { + result.add(exprNodeDesc); + } } return result; } @@ -218,7 +221,7 @@ private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator cur Operator terminal) throws SemanticException { Map mapping = current.getColumnExprMap(); if (mapping == null || !mapping.containsKey(column.getColumn())) { - return backtrack((ExprNodeDesc)column, current, terminal); + return null; } ExprNodeDesc mapped = mapping.get(column.getColumn()); return backtrack(mapped, current, terminal); @@ -244,4 +247,49 @@ private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator cur } throw new SemanticException("Met multiple parent operators"); } + + public static void replaceColumnName(ArrayList targets, Map + old2newNameMap) { + for (int i = 0; i < targets.size(); i++) { + targets.set(i, replaceColumnName(targets.get(i), old2newNameMap)); + } + } + + /** + * Change col name + * + * @param expr + * @param old2newNameMap + * @return + */ + public static ExprNodeDesc replaceColumnName(ExprNodeDesc expr, Map old2newNameMap) { + if (expr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc exprNodeColumnDesc = (ExprNodeColumnDesc) expr; + if (old2newNameMap.containsKey(exprNodeColumnDesc.getColumn())) { + return new ExprNodeColumnDesc(exprNodeColumnDesc.getTypeInfo(), + old2newNameMap.get(exprNodeColumnDesc.getColumn()),exprNodeColumnDesc.getTabAlias(), + exprNodeColumnDesc.getIsPartitionColOrVirtualCol(), exprNodeColumnDesc.isSkewedCol()); + } + } else if (expr instanceof ExprNodeFieldDesc) { + ExprNodeFieldDesc exprNodeFieldDesc = (ExprNodeFieldDesc) expr; + exprNodeFieldDesc.setDesc(replaceColumnName(exprNodeFieldDesc.getDesc(), old2newNameMap)); + } else if (expr instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genericFuncDesc = (ExprNodeGenericFuncDesc) expr; + List newChildExprs = new ArrayList(); + for (ExprNodeDesc childExpr : genericFuncDesc.getChildExprs()) { + newChildExprs.add(replaceColumnName(childExpr, old2newNameMap)); + } + genericFuncDesc.setChildExprs(newChildExprs); + } else if (expr instanceof ExprNodeColumnListDesc) { + ExprNodeColumnListDesc exprNodeColumnListDesc = (ExprNodeColumnListDesc) expr; + List newCols = new ArrayList(); + for (ExprNodeDesc exprNodeDesc : exprNodeColumnListDesc.getChildren()) { + newCols.add((ExprNodeColumnDesc) replaceColumnName(exprNodeDesc, old2newNameMap)); + } + exprNodeColumnListDesc.setCols(newCols); + } + return expr; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index b6097b1..a0839f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -622,7 +624,7 @@ public static ReduceSinkDesc getReduceSinkDesc( valueCols, outputValueColumnNames, 0, "")); outputValCols.addAll(outputValueColumnNames); return new ReduceSinkDesc(keyCols, numKeys, valueCols, outputKeyCols, - distinctColIndices, outputValCols, + distinctColIndices, outputValCols, includeKeyCols, tag, partitionCols, numReducers, keyTable, valueTable); } @@ -873,4 +875,24 @@ public static ReadEntity addInput(Set inputs, ReadEntity newInput) { // make compile happy return null; } + + public static void genReduceSinkExprMap(ReduceSinkOperator rs) { + ReduceSinkDesc conf = rs.getConf(); + Map exprMap = new HashMap(); + if (rs.getConf().isIncludeKeyCols()) { + for (int i = 0; i < conf.getKeyCols().size(); i++) { + exprMap.put(Utilities.getColumnInternalName(Utilities.ReduceField.KEY, i), + conf.getKeyCols().get(i)); + } + for (int i = 0; i < conf.getValueCols().size(); i++) { + exprMap.put(Utilities.getColumnInternalName(Utilities.ReduceField.VALUE, i), + conf.getValueCols().get(i)); + } + } else { + for (int i = 0; i < conf.getValueCols().size(); i++) { + exprMap.put(HiveConf.getColumnInternalName(i), conf.getValueCols().get(i)); + } + } + rs.setColumnExprMap(exprMap); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index 5837fac..7b142a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -72,6 +72,8 @@ private float topNMemoryUsage = -1; private boolean mapGroupBy; // for group-by, values with same key on top-K should be forwarded + private boolean includeKeyCols; + public ReduceSinkDesc() { } @@ -80,7 +82,8 @@ public ReduceSinkDesc(ArrayList keyCols, ArrayList valueCols, ArrayList outputKeyColumnNames, List> distinctColumnIndices, - ArrayList outputValueColumnNames, int tag, + ArrayList outputValueColumnNames, + boolean includeKeyCols, int tag, ArrayList partitionCols, int numReducers, final TableDesc keySerializeInfo, final TableDesc valueSerializeInfo) { this.keyCols = keyCols; @@ -88,6 +91,7 @@ public ReduceSinkDesc(ArrayList keyCols, this.valueCols = valueCols; this.outputKeyColumnNames = outputKeyColumnNames; this.outputValueColumnNames = outputValueColumnNames; + this.includeKeyCols = includeKeyCols; this.tag = tag; this.numReducers = numReducers; this.partitionCols = partitionCols; @@ -273,4 +277,12 @@ public void setDistinctColumnIndices( List> distinctColumnIndices) { this.distinctColumnIndices = distinctColumnIndices; } + + public boolean isIncludeKeyCols() { + return includeKeyCols; + } + + public void setIncludeKeyCols(boolean includeKeyCols) { + this.includeKeyCols = includeKeyCols; + } } diff --git a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q new file mode 100644 index 0000000..8746cde --- /dev/null +++ b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q @@ -0,0 +1,46 @@ +set hive.optimize.reducededuplication=true; +set hive.optimize.reducededuplication.min.reducer=1; +set hive.map.aggr=true; + +-- HIVE-2340 deduplicate RS followed by RS +-- hive.optimize.reducededuplication : wherther using this optimization +-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should be this at least + +create table src9 (key string, value string); +load data local inpath '../data/files/kv9.txt' into table src9; + +-- RS-mGBY-RS-rGBY +explain select key, value from (select key, value from src9 order by key, value) t group by value, key; +-- mGBY-RS-rGBY-RS +-- should not be optimized +explain select key, sum(key) as value from src9 group by key order by value, key; +explain select key, value from (select key, value from src9 group by key, value) t order by key desc, value; +explain select key, value from (select key, value from src9 group by key, value) t order by value, key; +-- mGBY-RS-rGBY-mGBY-RS-rGBY +explain select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4; + +select key, value from (select key, value from src9 order by key, value) t group by value, key; +select key, sum(key) as value from src9 group by key order by value, key; +select key, value from (select key, value from src9 group by key, value) t order by key desc, value; +select key, value from (select key, value from src9 group by key, value) t order by value, key; +select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4; + +set hive.map.aggr=false; + +-- RS-RS-GBY +explain select key, value from (select key, value from src9 order by key, value) t group by value, key; +-- RS-GBY-RS +-- should not be optimized +explain select key, sum(key) as value from src9 group by key order by value, key; +explain select key, value from (select key, value from src9 group by key, value) t order by key desc, value; +explain select key, value from (select key, value from src9 group by key, value) t order by value, key; +-- RS-GBY-RS-GBY +explain select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4; + +select key, value from (select key, value from src9 order by key, value) t group by value, key; +select key, sum(key) as value from src9 group by key order by value, key; +select key, value from (select key, value from src9 group by key, value) t order by key desc, value; +select key, value from (select key, value from src9 group by key, value) t order by value, key; +select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4; + +drop table src9; diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 0a27b8a..5c2238e 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -2007,6 +2007,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2032,8 +2034,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSche POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2055,6 +2055,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2080,8 +2082,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSche POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2120,9 +2120,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key @@ -2145,6 +2145,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2172,8 +2174,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2195,6 +2195,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2222,8 +2224,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2287,6 +2287,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2316,8 +2318,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2339,6 +2339,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2368,8 +2370,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2433,6 +2433,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2464,8 +2466,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -2487,6 +2487,8 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] @@ -2518,8 +2520,6 @@ POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 2a762b3..261898f 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -1972,7 +1972,7 @@ STAGE PLANS: type: double expr: _col2 type: string - sort order: ++++ + sort order: +++ Map-reduce partition columns: expr: _col0 type: string diff --git a/ql/src/test/results/clientpositive/ppd2.q.out b/ql/src/test/results/clientpositive/ppd2.q.out index ecb824e..9955a5d 100644 --- a/ql/src/test/results/clientpositive/ppd2.q.out +++ b/ql/src/test/results/clientpositive/ppd2.q.out @@ -60,9 +60,7 @@ STAGE PLANS: key expressions: expr: _col0 type: string - expr: _col1 - type: bigint - sort order: +- + sort order: + Map-reduce partition columns: expr: _col0 type: string @@ -532,9 +530,7 @@ STAGE PLANS: key expressions: expr: _col0 type: string - expr: _col1 - type: bigint - sort order: +- + sort order: + Map-reduce partition columns: expr: _col0 type: string diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate.q.out index a02d4c7..74f6878 100644 --- a/ql/src/test/results/clientpositive/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/reduce_deduplicate.q.out @@ -35,10 +35,7 @@ STAGE PLANS: type: string outputColumnNames: _col0, _col1 Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + + sort order: Map-reduce partition columns: expr: _col0 type: string diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out index 407afaa..9a83156 100644 --- a/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out +++ b/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -37,9 +37,7 @@ STAGE PLANS: key expressions: expr: _col0 type: string - expr: _col1 - type: string - sort order: ++ + sort order: + Map-reduce partition columns: expr: _col0 type: string @@ -293,9 +291,7 @@ STAGE PLANS: key expressions: expr: _col0 type: string - expr: _col1 - type: double - sort order: ++ + sort order: + Map-reduce partition columns: expr: _col0 type: string @@ -612,20 +608,21 @@ STAGE PLANS: key expressions: expr: _col0 type: string - expr: _col1 - type: string - sort order: ++ + sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 + value expressions: + expr: _col1 + type: string Reduce Operator Tree: Group By Operator bucketGroup: false keys: expr: KEY._col0 type: string - expr: KEY._col1 + expr: VALUE._col0 type: string mode: mergepartial outputColumnNames: _col0, _col1 @@ -2755,9 +2752,7 @@ STAGE PLANS: key expressions: expr: _col0 type: string - expr: _col1 - type: string - sort order: ++ + sort order: + Map-reduce partition columns: expr: _col0 type: string @@ -3002,9 +2997,7 @@ STAGE PLANS: key expressions: expr: key type: string - expr: _col1 - type: double - sort order: ++ + sort order: + Map-reduce partition columns: expr: key type: string @@ -3295,20 +3288,21 @@ STAGE PLANS: key expressions: expr: key type: string - expr: value - type: string - sort order: ++ + sort order: + Map-reduce partition columns: expr: key type: string tag: -1 + value expressions: + expr: value + type: string Reduce Operator Tree: Group By Operator bucketGroup: false keys: expr: KEY._col0 type: string - expr: KEY._col1 + expr: VALUE._col0 type: string mode: complete outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out new file mode 100644 index 0000000..999a6d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out @@ -0,0 +1,1216 @@ +PREHOOK: query: -- HIVE-2340 deduplicate RS followed by RS +-- hive.optimize.reducededuplication : wherther using this optimization +-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should be this at least + +create table src9 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- HIVE-2340 deduplicate RS followed by RS +-- hive.optimize.reducededuplication : wherther using this optimization +-- hive.optimize.reducededuplication.min.reducer : number of reducer of deduped RS should be this at least + +create table src9 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src9 +PREHOOK: query: load data local inpath '../data/files/kv9.txt' into table src9 +PREHOOK: type: LOAD +PREHOOK: Output: default@src9 +POSTHOOK: query: load data local inpath '../data/files/kv9.txt' into table src9 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@src9 +PREHOOK: query: -- RS-mGBY-RS-rGBY +explain select key, value from (select key, value from src9 order by key, value) t group by value, key +PREHOOK: type: QUERY +POSTHOOK: query: -- RS-mGBY-RS-rGBY +explain select key, value from (select key, value from src9 order by key, value) t group by value, key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col0 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col0 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- mGBY-RS-rGBY-RS +-- should not be optimized +explain select key, sum(key) as value from src9 group by key order by value, key +PREHOOK: type: QUERY +POSTHOOK: query: -- mGBY-RS-rGBY-RS +-- should not be optimized +explain select key, sum(key) as value from src9 group by key order by value, key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: sum(key) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: double + expr: _col0 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by key desc, value +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by key desc, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: -+ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by value, key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by value, key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col0 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col1 + type: string + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- mGBY-RS-rGBY-mGBY-RS-rGBY +explain select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +PREHOOK: type: QUERY +POSTHOOK: query: -- mGBY-RS-rGBY-mGBY-RS-rGBY +explain select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL key) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL key) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL key) k4)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1)) (TOK_SELEXPR (TOK_TABLE_OR_COL k2)) (TOK_SELEXPR (TOK_TABLE_OR_COL k3)) (TOK_SELEXPR (TOK_TABLE_OR_COL k4))) (TOK_GROUPBY (TOK_TABLE_OR_COL k1) (TOK_TABLE_OR_COL k2) (TOK_TABLE_OR_COL k3) (TOK_TABLE_OR_COL k4)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k2)) (TOK_SELEXPR (TOK_TABLE_OR_COL k4)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL k1))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL k3)))) (TOK_GROUPBY (TOK_TABLE_OR_COL k2) (TOK_TABLE_OR_COL k4)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: key + type: string + expr: key + type: string + expr: key + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col3 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col1 + type: string + expr: _col3 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col2 + type: string + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: VALUE._col0 + type: string + expr: KEY._col0 + type: string + expr: VALUE._col1 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: + expr: count(_col0) + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col3 + type: string + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + expr: _col3 + type: double + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key, value from (select key, value from src9 order by key, value) t group by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select key, value from src9 order by key, value) t group by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +128 val_128 +150 val_150 +165 val_165 +193 val_193 +213 val_213 +213 val_214 +200 val_215 +224 val_224 +238 val_238 +238 val_239 +238 val_240 +255 val_255 +265 val_265 +27 val_27 +273 val_273 +278 val_278 +311 val_311 +369 val_369 +401 val_401 +409 val_409 +484 val_484 +66 val_66 +86 val_86 +98 val_98 +PREHOOK: query: select key, sum(key) as value from src9 group by key order by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, sum(key) as value from src9 group by key order by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +27 27.0 +66 66.0 +86 86.0 +98 98.0 +128 128.0 +150 150.0 +165 165.0 +193 193.0 +200 200.0 +224 224.0 +255 255.0 +265 265.0 +273 273.0 +278 278.0 +311 311.0 +369 369.0 +401 401.0 +409 409.0 +484 484.0 +213 639.0 +238 714.0 +PREHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by key desc, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by key desc, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +98 val_98 +86 val_86 +66 val_66 +484 val_484 +409 val_409 +401 val_401 +369 val_369 +311 val_311 +278 val_278 +273 val_273 +27 val_27 +265 val_265 +255 val_255 +238 val_238 +238 val_239 +238 val_240 +224 val_224 +213 val_213 +213 val_214 +200 val_215 +193 val_193 +165 val_165 +150 val_150 +128 val_128 +PREHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +128 val_128 +150 val_150 +165 val_165 +193 val_193 +213 val_213 +213 val_214 +200 val_215 +224 val_224 +238 val_238 +238 val_239 +238 val_240 +255 val_255 +265 val_265 +27 val_27 +273 val_273 +278 val_278 +311 val_311 +369 val_369 +401 val_401 +409 val_409 +484 val_484 +66 val_66 +86 val_86 +98 val_98 +PREHOOK: query: select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +128 128 1 128.0 +150 150 1 150.0 +165 165 1 165.0 +193 193 1 193.0 +200 200 1 200.0 +213 213 1 213.0 +224 224 1 224.0 +238 238 1 238.0 +255 255 1 255.0 +265 265 1 265.0 +27 27 1 27.0 +273 273 1 273.0 +278 278 1 278.0 +311 311 1 311.0 +369 369 1 369.0 +401 401 1 401.0 +409 409 1 409.0 +484 484 1 484.0 +66 66 1 66.0 +86 86 1 86.0 +98 98 1 98.0 +PREHOOK: query: -- RS-RS-GBY +explain select key, value from (select key, value from src9 order by key, value) t group by value, key +PREHOOK: type: QUERY +POSTHOOK: query: -- RS-RS-GBY +explain select key, value from (select key, value from src9 order by key, value) t group by value, key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col0 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col0 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- RS-GBY-RS +-- should not be optimized +explain select key, sum(key) as value from src9 group by key order by value, key +PREHOOK: type: QUERY +POSTHOOK: query: -- RS-GBY-RS +-- should not be optimized +explain select key, sum(key) as value from src9 group by key order by value, key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(KEY._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: double + expr: _col0 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by key desc, value +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by key desc, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEDESC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + expr: value + type: string + sort order: -+ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by value, key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from (select key, value from src9 group by key, value) t order by value, key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: value + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col1 + type: string + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- RS-GBY-RS-GBY +explain select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +PREHOOK: type: QUERY +POSTHOOK: query: -- RS-GBY-RS-GBY +explain select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src9))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL key) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL key) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL key) k4)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1)) (TOK_SELEXPR (TOK_TABLE_OR_COL k2)) (TOK_SELEXPR (TOK_TABLE_OR_COL k3)) (TOK_SELEXPR (TOK_TABLE_OR_COL k4))) (TOK_GROUPBY (TOK_TABLE_OR_COL k1) (TOK_TABLE_OR_COL k2) (TOK_TABLE_OR_COL k3) (TOK_TABLE_OR_COL k4)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k2)) (TOK_SELEXPR (TOK_TABLE_OR_COL k4)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL k1))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL k3)))) (TOK_GROUPBY (TOK_TABLE_OR_COL k2) (TOK_TABLE_OR_COL k4)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t:t:src9 + TableScan + alias: src9 + Select Operator + expressions: + expr: key + type: string + expr: key + type: string + expr: key + type: string + expr: key + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col3 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col1 + type: string + expr: _col3 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col2 + type: string + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: VALUE._col0 + type: string + expr: KEY._col0 + type: string + expr: VALUE._col1 + type: string + expr: KEY._col1 + type: string + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: + expr: count(_col0) + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col3 + type: string + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + expr: _col3 + type: double + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key, value from (select key, value from src9 order by key, value) t group by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select key, value from src9 order by key, value) t group by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +128 val_128 +150 val_150 +165 val_165 +193 val_193 +213 val_213 +213 val_214 +200 val_215 +224 val_224 +238 val_238 +238 val_239 +238 val_240 +255 val_255 +265 val_265 +27 val_27 +273 val_273 +278 val_278 +311 val_311 +369 val_369 +401 val_401 +409 val_409 +484 val_484 +66 val_66 +86 val_86 +98 val_98 +PREHOOK: query: select key, sum(key) as value from src9 group by key order by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, sum(key) as value from src9 group by key order by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +27 27.0 +66 66.0 +86 86.0 +98 98.0 +128 128.0 +150 150.0 +165 165.0 +193 193.0 +200 200.0 +224 224.0 +255 255.0 +265 265.0 +273 273.0 +278 278.0 +311 311.0 +369 369.0 +401 401.0 +409 409.0 +484 484.0 +213 639.0 +238 714.0 +PREHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by key desc, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by key desc, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +98 val_98 +86 val_86 +66 val_66 +484 val_484 +409 val_409 +401 val_401 +369 val_369 +311 val_311 +278 val_278 +273 val_273 +27 val_27 +265 val_265 +255 val_255 +238 val_238 +238 val_239 +238 val_240 +224 val_224 +213 val_213 +213 val_214 +200 val_215 +193 val_193 +165 val_165 +150 val_150 +128 val_128 +PREHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by value, key +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select key, value from src9 group by key, value) t order by value, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +128 val_128 +150 val_150 +165 val_165 +193 val_193 +213 val_213 +213 val_214 +200 val_215 +224 val_224 +238 val_238 +238 val_239 +238 val_240 +255 val_255 +265 val_265 +27 val_27 +273 val_273 +278 val_278 +311 val_311 +369 val_369 +401 val_401 +409 val_409 +484 val_484 +66 val_66 +86 val_86 +98 val_98 +PREHOOK: query: select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src9 +#### A masked pattern was here #### +POSTHOOK: query: select k2, k4, count(k1), sum(k3) from (select k1, k2, k3, k4 from (select key k1, key k2, key k3, key k4 from src9) t group by k1, k2, k3, k4) t group by k2, k4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src9 +#### A masked pattern was here #### +128 128 1 128.0 +150 150 1 150.0 +165 165 1 165.0 +193 193 1 193.0 +200 200 1 200.0 +213 213 1 213.0 +224 224 1 224.0 +238 238 1 238.0 +255 255 1 255.0 +265 265 1 265.0 +27 27 1 27.0 +273 273 1 273.0 +278 278 1 278.0 +311 311 1 311.0 +369 369 1 369.0 +401 401 1 401.0 +409 409 1 409.0 +484 484 1 484.0 +66 66 1 66.0 +86 86 1 86.0 +98 98 1 98.0 +PREHOOK: query: drop table src9 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src9 +PREHOOK: Output: default@src9 +POSTHOOK: query: drop table src9 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src9 +POSTHOOK: Output: default@src9 diff --git a/ql/src/test/results/compiler/plan/groupby1.q.xml b/ql/src/test/results/compiler/plan/groupby1.q.xml index 1360e2e..b557096 100755 --- a/ql/src/test/results/compiler/plan/groupby1.q.xml +++ b/ql/src/test/results/compiler/plan/groupby1.q.xml @@ -356,6 +356,9 @@ + + true + diff --git a/ql/src/test/results/compiler/plan/groupby2.q.xml b/ql/src/test/results/compiler/plan/groupby2.q.xml index b41c3a4..3f92ccf 100755 --- a/ql/src/test/results/compiler/plan/groupby2.q.xml +++ b/ql/src/test/results/compiler/plan/groupby2.q.xml @@ -249,6 +249,9 @@ + + true + diff --git a/ql/src/test/results/compiler/plan/groupby3.q.xml b/ql/src/test/results/compiler/plan/groupby3.q.xml index 368198f..942fc80 100644 --- a/ql/src/test/results/compiler/plan/groupby3.q.xml +++ b/ql/src/test/results/compiler/plan/groupby3.q.xml @@ -298,6 +298,9 @@ + + true + diff --git a/ql/src/test/results/compiler/plan/groupby4.q.xml b/ql/src/test/results/compiler/plan/groupby4.q.xml index 3b52d86..4c09a2a 100644 --- a/ql/src/test/results/compiler/plan/groupby4.q.xml +++ b/ql/src/test/results/compiler/plan/groupby4.q.xml @@ -191,6 +191,9 @@ + + true + diff --git a/ql/src/test/results/compiler/plan/groupby5.q.xml b/ql/src/test/results/compiler/plan/groupby5.q.xml index 1595dc8..21a5f04 100644 --- a/ql/src/test/results/compiler/plan/groupby5.q.xml +++ b/ql/src/test/results/compiler/plan/groupby5.q.xml @@ -209,6 +209,9 @@ + + true + diff --git a/ql/src/test/results/compiler/plan/groupby6.q.xml b/ql/src/test/results/compiler/plan/groupby6.q.xml index 620b3ed..a0b881c 100644 --- a/ql/src/test/results/compiler/plan/groupby6.q.xml +++ b/ql/src/test/results/compiler/plan/groupby6.q.xml @@ -191,6 +191,9 @@ + + true + diff --git a/ql/src/test/results/compiler/plan/input20.q.xml b/ql/src/test/results/compiler/plan/input20.q.xml index 04c19d9..e7c3ee9 100644 --- a/ql/src/test/results/compiler/plan/input20.q.xml +++ b/ql/src/test/results/compiler/plan/input20.q.xml @@ -165,7 +165,7 @@ - + _col1 @@ -1503,6 +1503,9 @@ + + + diff --git a/ql/src/test/results/compiler/plan/input4.q.xml b/ql/src/test/results/compiler/plan/input4.q.xml index 4d953c0..41c4669 100755 --- a/ql/src/test/results/compiler/plan/input4.q.xml +++ b/ql/src/test/results/compiler/plan/input4.q.xml @@ -316,7 +316,7 @@ - + _col1 @@ -1457,6 +1457,9 @@ + + + diff --git a/ql/src/test/results/compiler/plan/input5.q.xml b/ql/src/test/results/compiler/plan/input5.q.xml index c28cd0f..895879f 100644 --- a/ql/src/test/results/compiler/plan/input5.q.xml +++ b/ql/src/test/results/compiler/plan/input5.q.xml @@ -320,7 +320,7 @@ - + _col1 @@ -1507,6 +1507,9 @@ + + +