diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index f1c3564..561bf3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -118,6 +119,8 @@ OrcFileMergeOperator.class)); opvec.add(new OpTuple(CommonMergeJoinDesc.class, CommonMergeJoinOperator.class)); + opvec.add(new OpTuple(ListSinkDesc.class, + ListSinkOperator.class)); } static { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index ba28bc7..b47bc6e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -600,8 +600,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, // revert output cols of SEL(*) to ExprNodeColumnDesc String[] tabcol = rr.reverseLookup(col); ColumnInfo colInfo = rr.get(tabcol[0], tabcol[1]); - ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(colInfo); colList.add(colExpr); outputColNames.add(col); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 906dadf..378f233 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -31,12 +32,16 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -53,6 +58,7 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QB; +import org.apache.hadoop.hive.ql.parse.QBExpr; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -156,7 +162,7 @@ private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) thr // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS // // for non-aggressive mode (minimal) - // 1. samping is not allowed + // 1. sampling is not allowed // 2. for partitioned table, all filters should be targeted to partition column // 3. SelectOperator should use only simple cast/column access private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, @@ -170,52 +176,74 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, return null; } - Table table = pctx.getTopToTable().get(ts); + // extract table names from id, which is consist of "(subquery-alias:)*table-name" + String[] subqIDs = alias.split(":"); + for (int i = 0 ; i < subqIDs.length - 1; i++) { + // for union case, subquery-alias is attached by "-subquery[12]" + // for top-level union, subquery-alias is regarded as "null" + String[] subqID = subqIDs[i].split("-"); + if (subqID[0].equals("null")) { + continue; // root alias + } + QBExpr qbexpr = qb.getSubqForAlias(subqID[0]); + for (int j = 1; j < subqID.length; j++) { + if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { + if (subqID[j].equals("subquery1")) { + qbexpr = qbexpr.getQBExpr1(); + } else if (subqID[j].equals("subquery2")) { + qbexpr = qbexpr.getQBExpr2(); + } + } + } + qb = qbexpr.getQB(); + } + String tableName = subqIDs[subqIDs.length - 1]; + Table table = qb.getMetaData().getAliasToTable().get(tableName); if (table == null) { return null; } ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput()); if (!table.isPartitioned()) { - return checkOperators(new FetchData(parent, table, splitSample), ts, aggressive, false); + FetchData fetch = new FetchData(ts, parent, table, splitSample); + return checkOperators(fetch, aggressive, false); } boolean bypassFilter = false; if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) { ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts); - bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner); - } - if (aggressive || bypassFilter) { - PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); - if (aggressive || !pruned.hasUnknownPartitions()) { - bypassFilter &= !pruned.hasUnknownPartitions(); - return checkOperators(new FetchData(parent, table, pruned, splitSample, bypassFilter), ts, - aggressive, bypassFilter); + if (PartitionPruner.onlyContainsPartnCols(table, pruner)) { + bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - return null; + if (!aggressive && !bypassFilter) { + return null; + } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + return checkOperators(fetch, aggressive, bypassFilter); + } + + private FetchData checkOperators(FetchData fetch, boolean aggressive, boolean bypassFilter) { + if (aggressive) { + return isConvertible(fetch) ? fetch : null; + } + return checkOperators(fetch, fetch.scanOp, bypassFilter); } - private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggressive, - boolean bypassFilter) { + private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean bypassFilter) { if (ts.getChildOperators().size() != 1) { return null; } Operator op = ts.getChildOperators().get(0); for (; ; op = op.getChildOperators().get(0)) { if (op instanceof SelectOperator) { - if (!aggressive) { - if (!checkExpressions((SelectOperator) op)) { - break; - } + if (!checkExpressions((SelectOperator) op)) { + return null; } continue; } - if (aggressive) { - if (!(op instanceof LimitOperator || op instanceof FilterOperator)) { - break; - } - } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { + if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { break; } @@ -225,7 +253,6 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean } if (op instanceof FileSinkOperator) { - fetch.scanOp = ts; fetch.fileSink = op; return fetch; } @@ -235,6 +262,9 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean private boolean checkExpressions(SelectOperator op) { SelectDesc desc = op.getConf(); + if (desc.isSelectStar() || desc.isSelStarNoCompute()) { + return true; + } for (ExprNodeDesc expr : desc.getColList()) { if (!checkExpression(expr)) { return false; @@ -260,22 +290,53 @@ private boolean checkExpression(ExprNodeDesc expr) { return false; } + private boolean isConvertible(FetchData fetch) { + return isConvertible(fetch, fetch.scanOp, new HashSet>()); + } + + private boolean isConvertible(FetchData fetch, Operator operator, Set> traversed) { + if (operator instanceof ReduceSinkOperator || operator instanceof CommonJoinOperator + || operator instanceof ScriptOperator) { + return false; + } + if (!traversed.add(operator)) { + return true; + } + if (operator.getNumChild() == 0) { + if (operator instanceof FileSinkOperator) { + fetch.fileSink = operator; + return true; + } + return false; + } + for (Operator child : operator.getChildOperators()) { + if (!traversed.containsAll(child.getParentOperators())){ + continue; + } + if (!isConvertible(fetch, child, traversed)) { + return false; + } + } + return true; + } + private class FetchData { + // source table scan + private final TableScanOperator scanOp; private final ReadEntity parent; + private final Table table; private final SplitSample splitSample; private final PrunedPartitionList partsList; private final HashSet inputs = new HashSet(); private final boolean onlyPruningFilter; - // source table scan - private TableScanOperator scanOp; - // this is always non-null when conversion is completed private Operator fileSink; - private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = null; @@ -283,8 +344,9 @@ private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { this.onlyPruningFilter = false; } - private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, PrunedPartitionList partsList, SplitSample splitSample, boolean bypassFilter) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = partsList; @@ -302,7 +364,7 @@ public boolean hasOnlyPruningFilter() { private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { - inputs.add(new ReadEntity(table, parent, parent == null)); + inputs.add(new ReadEntity(table, parent, !table.isView() && parent == null)); FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table)); PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); work.setSplitSample(splitSample); @@ -395,8 +457,8 @@ private long getFileLength(JobConf conf, Path path, Class } public static ListSinkOperator replaceFSwithLS(Operator fileSink, String nullFormat) { - ListSinkOperator sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(nullFormat)); + ListSinkDesc desc = new ListSinkDesc(nullFormat); + ListSinkOperator sink = (ListSinkOperator) OperatorFactory.get(desc); sink.setParentOperators(new ArrayList>()); Operator parent = fileSink.getParentOperators().get(0); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index d79879c..cc61ce1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -209,8 +209,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ArrayList newValueCols = Lists.newArrayList(); Map colExprMap = Maps.newHashMap(); for (ColumnInfo ci : valColInfo) { - newValueCols.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), ci - .getTabAlias(), ci.isHiddenVirtualCol())); + newValueCols.add(new ExprNodeColumnDesc(ci)); colExprMap.put(ci.getInternalName(), newValueCols.get(newValueCols.size() - 1)); } ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, @@ -476,9 +475,7 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, for (Integer idx : pos) { ColumnInfo ci = colInfos.get(idx); - ExprNodeColumnDesc encd = new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), - ci.getTabAlias(), ci.isHiddenVirtualCol()); - cols.add(encd); + cols.add(new ExprNodeColumnDesc(ci)); } return cols; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java index 406c18e..d10dac7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java @@ -99,8 +99,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (!qualifiedAccess) { colInfo = getColInfo(ctx, null, tableOrCol, expr); // It's a column. - return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + return new ExprNodeColumnDesc(colInfo); } else if (hasTableAlias(ctx, tableOrCol, expr)) { return null; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java index 8afe218..6a8915e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java @@ -192,9 +192,7 @@ public static void processSkewJoin(JoinOperator joinOp, String newColName = i + "_VALUE_" + k; // any name, it does not matter. ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false); columnInfos.add(columnInfo); - newValueExpr.add(new ExprNodeColumnDesc( - columnInfo.getType(), columnInfo.getInternalName(), - columnInfo.getTabAlias(), false)); + newValueExpr.add(new ExprNodeColumnDesc(columnInfo)); if (!first) { colNames = colNames + ","; colTypes = colTypes + ","; @@ -216,9 +214,7 @@ public static void processSkewJoin(JoinOperator joinOp, ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory .getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false); columnInfos.add(columnInfo); - newKeyExpr.add(new ExprNodeColumnDesc( - columnInfo.getType(), columnInfo.getInternalName(), - columnInfo.getTabAlias(), false)); + newKeyExpr.add(new ExprNodeColumnDesc(columnInfo)); } newJoinValues.put(alias, newValueExpr); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index b15a46d..13f0a38 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -259,8 +259,15 @@ public boolean isTopLevelSelectStarQuery() { // to find target for fetch task conversion optimizer (not allows subqueries) public boolean isSimpleSelectQuery() { - return qbp.isSimpleSelectQuery() && aliasToSubq.isEmpty() && !isCTAS() && - !qbp.isAnalyzeCommand(); + if (!qbp.isSimpleSelectQuery() || isCTAS() || qbp.isAnalyzeCommand()) { + return false; + } + for (QBExpr qbexpr : aliasToSubq.values()) { + if (!qbexpr.isSimpleSelectQuery()) { + return false; + } + } + return true; } public boolean hasTableSample(String alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java index e923bca..36e65da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java @@ -114,4 +114,10 @@ public void print(String msg) { } } + public boolean isSimpleSelectQuery() { + if (qb != null) { + return qb.isSimpleSelectQuery(); + } + return qbexpr1.isSimpleSelectQuery() && qbexpr2.isSimpleSelectQuery(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 02c4be9..3e51188 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -466,10 +466,12 @@ public boolean isTopLevelSimpleSelectStarQuery() { return true; } + // for fast check of possible existence of RS (will be checked again in SimpleFetchOptimizer) public boolean isSimpleSelectQuery() { - if (isSubQ || joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() + if (joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() || !destToGroupby.isEmpty() || !destToClusterby.isEmpty() || !destToDistributeby.isEmpty() - || !aliasToLateralViews.isEmpty() || !destToLateralView.isEmpty()) { + || !destRollups.isEmpty() || !destCubes.isEmpty() || !destGroupingSets.isEmpty() + || !destToHaving.isEmpty()) { return false; } @@ -491,6 +493,7 @@ public boolean isSimpleSelectQuery() { } } + // exclude insert queries for (ASTNode v : nameToDest.values()) { if (!(v.getChild(0).getType() == HiveParser.TOK_TMP_FILE)) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 3afc071..92c4e51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -4130,9 +4130,7 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr)); } - groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo - .getInternalName(), exprInfo.getTabAlias(), exprInfo - .getIsVirtualCol())); + groupByKeys.add(new ExprNodeColumnDesc(exprInfo)); String field = getColumnInternalName(i); outputColumnNames.add(field); ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); @@ -6926,9 +6924,7 @@ private Operator genReduceSinkPlanForSortingBucketing(Table tab, Operator input, for (ColumnInfo colInfo : inputRR.getColumnInfos()) { String internalName = getColumnInternalName(i++); outputColumns.add(internalName); - valueCols.add(new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol())); + valueCols.add(new ExprNodeColumnDesc(colInfo)); colExprMap.put(internalName, valueCols .get(valueCols.size() - 1)); } @@ -7057,8 +7053,7 @@ private Operator genReduceSinkPlan(String dest, QB qb, Operator input, ColumnInfo colInfo = columnInfos.get(i); String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName()); - ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo); // backtrack can be null when input is script operator ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input); @@ -7310,8 +7305,7 @@ private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, ColumnInfo colInfo = columns.get(i); String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName()); - ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo); // backtrack can be null when input is script operator ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child); @@ -8399,12 +8393,9 @@ private Operator insertSelectAllPlanForGroupBy(Operator input) new HashMap(); for (int i = 0; i < columns.size(); i++) { ColumnInfo col = columns.get(i); - colList.add(new ExprNodeColumnDesc(col.getType(), col.getInternalName(), - col.getTabAlias(), col.getIsVirtualCol())); + colList.add(new ExprNodeColumnDesc(col)); columnNames.add(col.getInternalName()); - columnExprMap.put(col.getInternalName(), - new ExprNodeColumnDesc(col.getType(), col.getInternalName(), - col.getTabAlias(), col.getIsVirtualCol())); + columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col)); } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(colList, columnNames, true), new RowSchema(inputRR @@ -9261,8 +9252,7 @@ private ExprNodeDesc genSamplePredicate(TableSample ts, for (String col : bucketCols) { ColumnInfo ci = rwsch.get(alias, col); // TODO: change type to the one in the table schema - args.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), ci - .getTabAlias(), ci.getIsVirtualCol())); + args.add(new ExprNodeColumnDesc(ci)); } } else { for (ASTNode expr : ts.getExprs()) { @@ -9843,8 +9833,7 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) for (ColumnInfo col : source.getColumnInfos()) { String[] tabCol = source.reverseLookup(col.getInternalName()); lvForwardRR.put(tabCol[0], tabCol[1], col); - ExprNodeDesc colExpr = new ExprNodeColumnDesc(col.getType(), col.getInternalName(), - col.getTabAlias(), false); + ExprNodeDesc colExpr = new ExprNodeColumnDesc(col); colList.add(colExpr); colNames.add(colExpr.getName()); lvfColExprMap.put(col.getInternalName(), colExpr); @@ -9933,8 +9922,7 @@ private void LVmergeRowResolvers(RowResolver source, RowResolver dest, String tableAlias = tableCol[0]; String colAlias = tableCol[1]; dest.put(tableAlias, colAlias, newCol); - colExprMap.put(internalName, new ExprNodeColumnDesc(c.getType(), c.getInternalName(), - c.getTabAlias(), c.getIsVirtualCol())); + colExprMap.put(internalName, new ExprNodeColumnDesc(c)); } } @@ -11993,9 +11981,7 @@ void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, */ int pos = 0; for (ColumnInfo colInfo : colInfoList) { - ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo); valueCols.add(valueColExpr); String internalName = SemanticAnalyzer.getColumnInternalName(pos++); outputColumnNames.add(internalName); @@ -12240,9 +12226,7 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, RowResolver rsNewRR = new RowResolver(); int pos = 0; for (ColumnInfo colInfo : colInfoList) { - ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo); valueCols.add(valueColExpr); String internalName = SemanticAnalyzer.getColumnInternalName(pos++); outputColumnNames.add(internalName); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index e065983..1194b00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -125,9 +125,7 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); if (colInfo != null) { - desc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + desc = new ExprNodeColumnDesc(colInfo); ASTNode source = input.getExpressionSource(expr); if (source != null) { ctx.getUnparseTranslator().addCopyTranslation(expr, source); @@ -500,9 +498,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } // It's a column. - return new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + return new ExprNodeColumnDesc(colInfo); } else { // It's a table alias. // We will process that later in DOT. @@ -534,9 +530,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // It's a column. - ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo); exprNodColDesc.setSkewedCol(colInfo.isSkewedCol()); return exprNodColDesc; } @@ -975,8 +969,7 @@ protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode ex ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + return new ExprNodeColumnDesc(colInfo); } @Override @@ -1062,16 +1055,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (Map.Entry colMap : columns.entrySet()) { ColumnInfo colInfo = colMap.getValue(); if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(new ExprNodeColumnDesc(colInfo)); } } } else { // all columns (select *, for example) for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(new ExprNodeColumnDesc(colInfo)); } } } @@ -1125,8 +1116,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - children.add(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + children.add(new ExprNodeColumnDesc(colInfo)); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java index 250208e..8535c86 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnDesc.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -56,6 +57,11 @@ public ExprNodeColumnDesc() { } + public ExprNodeColumnDesc(ColumnInfo colInfo) { + this(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), + colInfo.getIsVirtualCol()); + } + public ExprNodeColumnDesc(TypeInfo typeInfo, String column, String tabAlias, boolean isPartitionColOrVirtualCol) { super(typeInfo); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 32d84ea..0d56828 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -99,8 +100,8 @@ public FetchWork(List partDir, List partDesc, public void initializeForFetch() { if (source == null) { - sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(serializationNullFormat)); + ListSinkDesc desc = new ListSinkDesc(serializationNullFormat); + sink = (ListSinkOperator) OperatorFactory.get(desc); source = sink; } } diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q index 2a92d17..2a52888 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch.q +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -9,7 +9,6 @@ select * from src limit 10; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; --- negative, select expression explain select key from src limit 10; select key from src limit 10; @@ -62,6 +61,16 @@ select * from src TABLESAMPLE (0.25 PERCENT); explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); +-- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; + +-- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; + -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; @@ -78,8 +87,5 @@ explain create table srcx as select distinct key, value from src; -- negative, analyze explain analyze table src compute statistics; --- negative, subq -explain select a.* from (select * from src) a; - -- negative, join explain select * from src join src src2 on src.key=src2.key; diff --git ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q index b1a7cb5..959212b 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q +++ ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q @@ -3,6 +3,11 @@ set hive.fetch.task.conversion=more; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +set hive.fetch.task.conversion.threshold=10000; + +explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; +explain select cast(key as int) * 10, upper(value) from src limit 10; + set hive.fetch.task.conversion.threshold=100; -- from HIVE-7397, limit + partition pruning filter diff --git ql/src/test/results/clientnegative/udf_assert_true.q.out ql/src/test/results/clientnegative/udf_assert_true.q.out index 819d723..4a5b30d 100644 --- ql/src/test/results/clientnegative/udf_assert_true.q.out +++ ql/src/test/results/clientnegative/udf_assert_true.q.out @@ -21,10 +21,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 > 0)) (type: void) outputColumnNames: _col0 @@ -48,7 +48,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 > 0)) (type: void) outputColumnNames: _col0 @@ -98,10 +98,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 < 2)) (type: void) outputColumnNames: _col0 @@ -125,7 +125,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 < 2)) (type: void) outputColumnNames: _col0 diff --git ql/src/test/results/clientnegative/udf_assert_true2.q.out ql/src/test/results/clientnegative/udf_assert_true2.q.out index 9760d0d..3684a3f 100644 --- ql/src/test/results/clientnegative/udf_assert_true2.q.out +++ ql/src/test/results/clientnegative/udf_assert_true2.q.out @@ -16,10 +16,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (1 + assert_true((_col5 < 2))) (type: double) outputColumnNames: _col0 @@ -43,7 +43,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (1 + assert_true((_col5 < 2))) (type: double) outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/lateral_view.q.out ql/src/test/results/clientpositive/lateral_view.q.out index 66c2968..25ed62f 100644 --- ql/src/test/results/clientpositive/lateral_view.q.out +++ ql/src/test/results/clientpositive/lateral_view.q.out @@ -132,14 +132,14 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -159,11 +159,11 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -199,12 +199,12 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col5 @@ -259,9 +259,9 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col5 @@ -332,12 +332,12 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator @@ -390,9 +390,9 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator @@ -519,10 +519,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int) outputColumnNames: _col0 @@ -546,7 +546,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col4 - Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int) outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/lateral_view_noalias.q.out ql/src/test/results/clientpositive/lateral_view_noalias.q.out index e1445bf..a2415f5 100644 --- ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -5,67 +5,49 @@ POSTHOOK: query: --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col5, _col6 + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Lateral View Join Operator - outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 2 - Processor Tree: - ListSink + ListSink PREHOOK: query: SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 PREHOOK: type: QUERY @@ -82,39 +64,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 PREHOOK: type: QUERY @@ -158,10 +128,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 @@ -182,7 +152,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 @@ -259,10 +229,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 @@ -283,7 +253,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/lateral_view_ppd.q.out ql/src/test/results/clientpositive/lateral_view_ppd.q.out index 1220030..b186192 100644 --- ql/src/test/results/clientpositive/lateral_view_ppd.q.out +++ ql/src/test/results/clientpositive/lateral_view_ppd.q.out @@ -175,23 +175,44 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ds = '2008-04-08') and (hr = '12')) (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col1, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col7 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 12 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col1, _col7 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col7 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 12 Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -202,30 +223,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col1, _col7 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col7 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 12 - Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -236,18 +233,12 @@ STAGE PLANS: PREHOOK: query: SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol) a WHERE ds='2008-04-08' AND hr="12" LIMIT 12 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### val_238 1 val_238 2 diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index c6a4318..beb30a4 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -86,11 +86,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 val_278 2008-04-08 11 98 val_98 2008-04-08 11 484 val_484 2008-04-08 11 -PREHOOK: query: -- negative, select expression -explain select key from src limit 10 +PREHOOK: query: explain select key from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: -- negative, select expression -explain select key from src limit 10 +POSTHOOK: query: explain select key from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -787,6 +785,150 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 86 val_86 2008-04-09 11 12 238 val_238 2008-04-09 12 0 86 val_86 2008-04-09 12 12 +PREHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 200) and (key < 250)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_238 238 +val_224 224 +val_213 213 +val_209 209 +val_219 219 +val_237 237 +val_207 207 +val_208 208 +val_247 247 +val_203 203 +val_205 205 +val_221 221 +val_208 208 +val_239 239 +val_213 213 +val_216 216 +val_221 221 +val_241 241 +val_230 230 +val_217 217 +PREHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + Select Operator + expressions: array(key,value) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 238 +238 val_238 +86 86 +86 val_86 +311 311 +311 val_311 +27 27 +27 val_27 +165 165 +165 val_165 +409 409 +409 val_409 +255 255 +255 val_255 +278 278 +278 val_278 +98 98 +98 val_98 +484 484 +484 val_484 PREHOOK: query: -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 PREHOOK: type: QUERY @@ -1029,29 +1171,6 @@ STAGE PLANS: Stage: Stage-1 Stats-Aggr Operator -PREHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -PREHOOK: type: QUERY -POSTHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink - PREHOOK: query: -- negative, join explain select * from src join src src2 on src.key=src2.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index cb0d332..d7bd42b 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -46,6 +46,54 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE ListSink +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/select_dummy_source.q.out ql/src/test/results/clientpositive/select_dummy_source.q.out index 08311f0..b1f0939 100644 --- ql/src/test/results/clientpositive/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/select_dummy_source.q.out @@ -190,37 +190,25 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out index c45f0db..4228cec 100644 --- ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out +++ ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out @@ -1815,9 +1815,9 @@ STAGE PLANS: value expressions: key (type: string) auto parallelism: true Path -> Alias: - -mr-10003default.src{} [s2] + -mr-10002default.src{} [s2] Path -> Partition: - -mr-10003default.src{} + -mr-10002default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1862,7 +1862,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10003default.src{} [s2] + -mr-10002default.src{} [s2] Map 3 Map Operator Tree: TableScan @@ -1882,9 +1882,9 @@ STAGE PLANS: value expressions: key (type: string) auto parallelism: true Path -> Alias: - -mr-10002default.src{} [s1] + -mr-10003default.src{} [s1] Path -> Partition: - -mr-10002default.src{} + -mr-10003default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1929,7 +1929,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10002default.src{} [s1] + -mr-10003default.src{} [s1] Reducer 2 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/tez/select_dummy_source.q.out ql/src/test/results/clientpositive/tez/select_dummy_source.q.out index 6f08083..fa99b76 100644 --- ql/src/test/results/clientpositive/tez/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/tez/select_dummy_source.q.out @@ -71,40 +71,22 @@ explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY @@ -185,40 +167,22 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_explode.q.out ql/src/test/results/clientpositive/udf_explode.q.out index 301b1b7..b5fd7a2 100644 --- ql/src/test/results/clientpositive/udf_explode.q.out +++ ql/src/test/results/clientpositive/udf_explode.q.out @@ -39,101 +39,26 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -383,101 +308,26 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns key,value - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_inline.q.out ql/src/test/results/clientpositive/udf_inline.q.out index a9cde60..45bd463 100644 --- ql/src/test/results/clientpositive/udf_inline.q.out +++ ql/src/test/results/clientpositive/udf_inline.q.out @@ -20,39 +20,27 @@ POSTHOOK: query: explain SELECT inline( ) as (id, text) FROM SRC limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT inline( ARRAY( diff --git ql/src/test/results/clientpositive/udtf_explode.q.out ql/src/test/results/clientpositive/udtf_explode.q.out index 6213746..e1fcdee 100644 --- ql/src/test/results/clientpositive/udtf_explode.q.out +++ ql/src/test/results/clientpositive/udtf_explode.q.out @@ -38,103 +38,28 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -419,39 +344,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udtf_stack.q.out ql/src/test/results/clientpositive/udtf_stack.q.out index 43f0a76..80edb65 100644 --- ql/src/test/results/clientpositive/udtf_stack.q.out +++ ql/src/test/results/clientpositive/udtf_stack.q.out @@ -21,10 +21,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 111000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 245000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -48,7 +48,7 @@ STAGE PLANS: function name: stack Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 111000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 245000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -88,10 +88,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 135000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 269000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -115,7 +115,7 @@ STAGE PLANS: function name: stack Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 135000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 269000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/union26.q.out ql/src/test/results/clientpositive/union26.q.out index cdc558a..4f9e0d4 100644 --- ql/src/test/results/clientpositive/union26.q.out +++ ql/src/test/results/clientpositive/union26.q.out @@ -103,91 +103,88 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ds = '2008-04-08') and (hr = '11')) (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col7 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4000 Data size: 42496 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) TableScan Union - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4275 Data size: 45417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -195,14 +192,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2137 Data size: 22703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 637 Data size: 6767 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2137 Data size: 22703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 637 Data size: 6767 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2137 Data size: 22703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 637 Data size: 6767 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -238,8 +235,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT count(1) as counts, @@ -265,8 +260,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 10 100 val_100 10 103 val_103 @@ -601,8 +594,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT count(1) as counts, @@ -628,8 +619,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 10 100 val_100 10 103 val_103 @@ -964,8 +953,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT count(1) as counts, @@ -991,8 +978,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 10 100 val_100 10 103 val_103