diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index f1c3564..561bf3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -118,6 +119,8 @@ OrcFileMergeOperator.class)); opvec.add(new OpTuple(CommonMergeJoinDesc.class, CommonMergeJoinOperator.class)); + opvec.add(new OpTuple(ListSinkDesc.class, + ListSinkOperator.class)); } static { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 24300d1..8207599 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.LinkedHashSet; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,12 +33,16 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -158,7 +163,7 @@ private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) thr // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS // // for non-aggressive mode (minimal) - // 1. samping is not allowed + // 1. sampling is not allowed // 2. for partitioned table, all filters should be targeted to partition column // 3. SelectOperator should use only simple cast/column access private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, @@ -171,53 +176,52 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, if (!aggressive && qb.hasTableSample(alias)) { return null; } - Table table = pctx.getTopToTable().get(ts); if (table == null) { return null; } ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput()); if (!table.isPartitioned()) { - return checkOperators(new FetchData(parent, table, splitSample), ts, aggressive, false); + FetchData fetch = new FetchData(ts, parent, table, splitSample); + return checkOperators(fetch, aggressive, false); } boolean bypassFilter = false; if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) { ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts); - bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner); - } - if (aggressive || bypassFilter) { - PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); - if (aggressive || !pruned.hasUnknownPartitions()) { - bypassFilter &= !pruned.hasUnknownPartitions(); - return checkOperators(new FetchData(parent, table, pruned, splitSample, bypassFilter), ts, - aggressive, bypassFilter); + if (PartitionPruner.onlyContainsPartnCols(table, pruner)) { + bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - return null; + if (!aggressive && !bypassFilter) { + return null; + } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + return checkOperators(fetch, aggressive, bypassFilter); } - private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggressive, - boolean bypassFilter) { + private FetchData checkOperators(FetchData fetch, boolean aggressive, boolean bypassFilter) { + if (aggressive) { + return isConvertible(fetch) ? fetch : null; + } + return checkOperators(fetch, fetch.scanOp, bypassFilter); + } + + private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean bypassFilter) { if (ts.getChildOperators().size() != 1) { return null; } Operator op = ts.getChildOperators().get(0); for (; ; op = op.getChildOperators().get(0)) { if (op instanceof SelectOperator) { - if (!aggressive) { - if (!checkExpressions((SelectOperator) op)) { - break; - } + if (!checkExpressions((SelectOperator) op)) { + return null; } continue; } - if (aggressive) { - if (!(op instanceof LimitOperator || op instanceof FilterOperator)) { - break; - } - } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { + if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { break; } @@ -227,7 +231,6 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean } if (op instanceof FileSinkOperator) { - fetch.scanOp = ts; fetch.fileSink = op; return fetch; } @@ -237,6 +240,9 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean private boolean checkExpressions(SelectOperator op) { SelectDesc desc = op.getConf(); + if (desc.isSelectStar() || desc.isSelStarNoCompute()) { + return true; + } for (ExprNodeDesc expr : desc.getColList()) { if (!checkExpression(expr)) { return false; @@ -264,22 +270,53 @@ private boolean checkExpression(ExprNodeDesc expr) { return false; } + private boolean isConvertible(FetchData fetch) { + return isConvertible(fetch, fetch.scanOp, new HashSet>()); + } + + private boolean isConvertible(FetchData fetch, Operator operator, Set> traversed) { + if (operator instanceof ReduceSinkOperator || operator instanceof CommonJoinOperator + || operator instanceof ScriptOperator) { + return false; + } + if (!traversed.add(operator)) { + return true; + } + if (operator.getNumChild() == 0) { + if (operator instanceof FileSinkOperator) { + fetch.fileSink = operator; + return true; + } + return false; + } + for (Operator child : operator.getChildOperators()) { + if (!traversed.containsAll(child.getParentOperators())){ + continue; + } + if (!isConvertible(fetch, child, traversed)) { + return false; + } + } + return true; + } + private class FetchData { + // source table scan + private final TableScanOperator scanOp; private final ReadEntity parent; + private final Table table; private final SplitSample splitSample; private final PrunedPartitionList partsList; - private final LinkedHashSet inputs = new LinkedHashSet(); + private final Set inputs = new LinkedHashSet(); private final boolean onlyPruningFilter; - // source table scan - private TableScanOperator scanOp; - // this is always non-null when conversion is completed private Operator fileSink; - private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = null; @@ -287,8 +324,9 @@ private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { this.onlyPruningFilter = false; } - private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, PrunedPartitionList partsList, SplitSample splitSample, boolean bypassFilter) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = partsList; @@ -306,7 +344,7 @@ public boolean hasOnlyPruningFilter() { private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { - inputs.add(new ReadEntity(table, parent, parent == null)); + inputs.add(new ReadEntity(table, parent, !table.isView() && parent == null)); FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table)); PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); work.setSplitSample(splitSample); @@ -399,8 +437,8 @@ private long getFileLength(JobConf conf, Path path, Class } public static ListSinkOperator replaceFSwithLS(Operator fileSink, String nullFormat) { - ListSinkOperator sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(nullFormat)); + ListSinkDesc desc = new ListSinkDesc(nullFormat); + ListSinkOperator sink = (ListSinkOperator) OperatorFactory.get(desc); sink.setParentOperators(new ArrayList>()); Operator parent = fileSink.getParentOperators().get(0); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 0497e5a..cf6941c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -262,8 +262,15 @@ public boolean isTopLevelSelectStarQuery() { // to find target for fetch task conversion optimizer (not allows subqueries) public boolean isSimpleSelectQuery() { - return qbp.isSimpleSelectQuery() && aliasToSubq.isEmpty() && !isCTAS() && - !qbp.isAnalyzeCommand(); + if (!qbp.isSimpleSelectQuery() || isCTAS() || qbp.isAnalyzeCommand()) { + return false; + } + for (QBExpr qbexpr : aliasToSubq.values()) { + if (!qbexpr.isSimpleSelectQuery()) { + return false; + } + } + return true; } public boolean hasTableSample(String alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java index e923bca..36e65da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java @@ -114,4 +114,10 @@ public void print(String msg) { } } + public boolean isSimpleSelectQuery() { + if (qb != null) { + return qb.isSimpleSelectQuery(); + } + return qbexpr1.isSimpleSelectQuery() && qbexpr2.isSimpleSelectQuery(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 02c4be9..3e51188 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -466,10 +466,12 @@ public boolean isTopLevelSimpleSelectStarQuery() { return true; } + // for fast check of possible existence of RS (will be checked again in SimpleFetchOptimizer) public boolean isSimpleSelectQuery() { - if (isSubQ || joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() + if (joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() || !destToGroupby.isEmpty() || !destToClusterby.isEmpty() || !destToDistributeby.isEmpty() - || !aliasToLateralViews.isEmpty() || !destToLateralView.isEmpty()) { + || !destRollups.isEmpty() || !destCubes.isEmpty() || !destGroupingSets.isEmpty() + || !destToHaving.isEmpty()) { return false; } @@ -491,6 +493,7 @@ public boolean isSimpleSelectQuery() { } } + // exclude insert queries for (ASTNode v : nameToDest.values()) { if (!(v.getChild(0).getType() == HiveParser.TOK_TMP_FILE)) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index bd610d9..3a3d639 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -128,9 +128,7 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) // If the current subExpression is pre-calculated, as in Group-By etc. ColumnInfo colInfo = input.getExpression(expr); if (colInfo != null) { - desc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + desc = new ExprNodeColumnDesc(colInfo); ASTNode source = input.getExpressionSource(expr); if (source != null) { ctx.getUnparseTranslator().addCopyTranslation(expr, source); @@ -513,9 +511,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } // It's a column. - return new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + return new ExprNodeColumnDesc(colInfo); } else { // It's a table alias. // We will process that later in DOT. @@ -547,9 +543,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { // It's a column. - ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + ExprNodeColumnDesc exprNodColDesc = new ExprNodeColumnDesc(colInfo); exprNodColDesc.setSkewedCol(colInfo.isSkewedCol()); return exprNodColDesc; } @@ -993,8 +987,7 @@ protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode ex ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); return null; } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + return new ExprNodeColumnDesc(colInfo); } @Override @@ -1080,16 +1073,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (Map.Entry colMap : columns.entrySet()) { ColumnInfo colInfo = colMap.getValue(); if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(new ExprNodeColumnDesc(colInfo)); } } } else { // all columns (select *, for example) for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + columnList.addColumn(new ExprNodeColumnDesc(colInfo)); } } } @@ -1142,8 +1133,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { - children.add(new ExprNodeColumnDesc(colInfo.getType(), - colInfo.getInternalName(), colInfo.getTabAlias(), false)); + children.add(new ExprNodeColumnDesc(colInfo)); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 32d84ea..0d56828 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -99,8 +100,8 @@ public FetchWork(List partDir, List partDesc, public void initializeForFetch() { if (source == null) { - sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(serializationNullFormat)); + ListSinkDesc desc = new ListSinkDesc(serializationNullFormat); + sink = (ListSinkOperator) OperatorFactory.get(desc); source = sink; } } diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q index 2a92d17..2a52888 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch.q +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -9,7 +9,6 @@ select * from src limit 10; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; --- negative, select expression explain select key from src limit 10; select key from src limit 10; @@ -62,6 +61,16 @@ select * from src TABLESAMPLE (0.25 PERCENT); explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); +-- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; + +-- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; + -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; @@ -78,8 +87,5 @@ explain create table srcx as select distinct key, value from src; -- negative, analyze explain analyze table src compute statistics; --- negative, subq -explain select a.* from (select * from src) a; - -- negative, join explain select * from src join src src2 on src.key=src2.key; diff --git ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q index b1a7cb5..959212b 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q +++ ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q @@ -3,6 +3,11 @@ set hive.fetch.task.conversion=more; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +set hive.fetch.task.conversion.threshold=10000; + +explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; +explain select cast(key as int) * 10, upper(value) from src limit 10; + set hive.fetch.task.conversion.threshold=100; -- from HIVE-7397, limit + partition pruning filter diff --git ql/src/test/results/clientpositive/lateral_view_noalias.q.out ql/src/test/results/clientpositive/lateral_view_noalias.q.out index c73697a..5eeee01 100644 --- ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -5,20 +5,38 @@ POSTHOOK: query: --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col5, _col6 + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE @@ -29,43 +47,7 @@ STAGE PLANS: Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Lateral View Join Operator - outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 2 - Processor Tree: - ListSink + ListSink PREHOOK: query: SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 PREHOOK: type: QUERY @@ -82,39 +64,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index 2df90c0..7a8ee05 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -86,11 +86,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 val_278 2008-04-08 11 98 val_98 2008-04-08 11 484 val_484 2008-04-08 11 -PREHOOK: query: -- negative, select expression -explain select key from src limit 10 +PREHOOK: query: explain select key from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: -- negative, select expression -explain select key from src limit 10 +POSTHOOK: query: explain select key from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -775,6 +773,150 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 86 val_86 2008-04-09 11 12 238 val_238 2008-04-09 12 0 86 val_86 2008-04-09 12 12 +PREHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(key) > 200.0) and (UDFToDouble(key) < 250.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_238 238 +val_224 224 +val_213 213 +val_209 209 +val_219 219 +val_237 237 +val_207 207 +val_208 208 +val_247 247 +val_203 203 +val_205 205 +val_221 221 +val_208 208 +val_239 239 +val_213 213 +val_216 216 +val_221 221 +val_241 241 +val_230 230 +val_217 217 +PREHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + Select Operator + expressions: array(key,value) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 238 +238 val_238 +86 86 +86 val_86 +311 311 +311 val_311 +27 27 +27 val_27 +165 165 +165 val_165 +409 409 +409 val_409 +255 255 +255 val_255 +278 278 +278 val_278 +98 98 +98 val_98 +484 484 +484 val_484 PREHOOK: query: -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 PREHOOK: type: QUERY @@ -1017,29 +1159,6 @@ STAGE PLANS: Stage: Stage-1 Stats-Aggr Operator -PREHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -PREHOOK: type: QUERY -POSTHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink - PREHOOK: query: -- negative, join explain select * from src join src src2 on src.key=src2.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index cb0d332..d7bd42b 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -46,6 +46,54 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE ListSink +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/select_dummy_source.q.out ql/src/test/results/clientpositive/select_dummy_source.q.out index 08311f0..b1f0939 100644 --- ql/src/test/results/clientpositive/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/select_dummy_source.q.out @@ -190,37 +190,25 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/select_dummy_source.q.out ql/src/test/results/clientpositive/tez/select_dummy_source.q.out index 6f08083..fa99b76 100644 --- ql/src/test/results/clientpositive/tez/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/tez/select_dummy_source.q.out @@ -71,40 +71,22 @@ explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY @@ -185,40 +167,22 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_inline.q.out ql/src/test/results/clientpositive/udf_inline.q.out index a9cde60..45bd463 100644 --- ql/src/test/results/clientpositive/udf_inline.q.out +++ ql/src/test/results/clientpositive/udf_inline.q.out @@ -20,39 +20,27 @@ POSTHOOK: query: explain SELECT inline( ) as (id, text) FROM SRC limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT inline( ARRAY(