diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index f1c3564..561bf3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -118,6 +119,8 @@ OrcFileMergeOperator.class)); opvec.add(new OpTuple(CommonMergeJoinDesc.class, CommonMergeJoinOperator.class)); + opvec.add(new OpTuple(ListSinkDesc.class, + ListSinkOperator.class)); } static { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 906dadf..378f233 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -31,12 +32,16 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -53,6 +58,7 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QB; +import org.apache.hadoop.hive.ql.parse.QBExpr; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -156,7 +162,7 @@ private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) thr // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS // // for non-aggressive mode (minimal) - // 1. samping is not allowed + // 1. sampling is not allowed // 2. for partitioned table, all filters should be targeted to partition column // 3. SelectOperator should use only simple cast/column access private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, @@ -170,52 +176,74 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, return null; } - Table table = pctx.getTopToTable().get(ts); + // extract table names from id, which is consist of "(subquery-alias:)*table-name" + String[] subqIDs = alias.split(":"); + for (int i = 0 ; i < subqIDs.length - 1; i++) { + // for union case, subquery-alias is attached by "-subquery[12]" + // for top-level union, subquery-alias is regarded as "null" + String[] subqID = subqIDs[i].split("-"); + if (subqID[0].equals("null")) { + continue; // root alias + } + QBExpr qbexpr = qb.getSubqForAlias(subqID[0]); + for (int j = 1; j < subqID.length; j++) { + if (qbexpr.getOpcode() == QBExpr.Opcode.UNION) { + if (subqID[j].equals("subquery1")) { + qbexpr = qbexpr.getQBExpr1(); + } else if (subqID[j].equals("subquery2")) { + qbexpr = qbexpr.getQBExpr2(); + } + } + } + qb = qbexpr.getQB(); + } + String tableName = subqIDs[subqIDs.length - 1]; + Table table = qb.getMetaData().getAliasToTable().get(tableName); if (table == null) { return null; } ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput()); if (!table.isPartitioned()) { - return checkOperators(new FetchData(parent, table, splitSample), ts, aggressive, false); + FetchData fetch = new FetchData(ts, parent, table, splitSample); + return checkOperators(fetch, aggressive, false); } boolean bypassFilter = false; if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) { ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts); - bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner); - } - if (aggressive || bypassFilter) { - PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); - if (aggressive || !pruned.hasUnknownPartitions()) { - bypassFilter &= !pruned.hasUnknownPartitions(); - return checkOperators(new FetchData(parent, table, pruned, splitSample, bypassFilter), ts, - aggressive, bypassFilter); + if (PartitionPruner.onlyContainsPartnCols(table, pruner)) { + bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - return null; + if (!aggressive && !bypassFilter) { + return null; + } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + return checkOperators(fetch, aggressive, bypassFilter); + } + + private FetchData checkOperators(FetchData fetch, boolean aggressive, boolean bypassFilter) { + if (aggressive) { + return isConvertible(fetch) ? fetch : null; + } + return checkOperators(fetch, fetch.scanOp, bypassFilter); } - private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggressive, - boolean bypassFilter) { + private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean bypassFilter) { if (ts.getChildOperators().size() != 1) { return null; } Operator op = ts.getChildOperators().get(0); for (; ; op = op.getChildOperators().get(0)) { if (op instanceof SelectOperator) { - if (!aggressive) { - if (!checkExpressions((SelectOperator) op)) { - break; - } + if (!checkExpressions((SelectOperator) op)) { + return null; } continue; } - if (aggressive) { - if (!(op instanceof LimitOperator || op instanceof FilterOperator)) { - break; - } - } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { + if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { break; } @@ -225,7 +253,6 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean } if (op instanceof FileSinkOperator) { - fetch.scanOp = ts; fetch.fileSink = op; return fetch; } @@ -235,6 +262,9 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean private boolean checkExpressions(SelectOperator op) { SelectDesc desc = op.getConf(); + if (desc.isSelectStar() || desc.isSelStarNoCompute()) { + return true; + } for (ExprNodeDesc expr : desc.getColList()) { if (!checkExpression(expr)) { return false; @@ -260,22 +290,53 @@ private boolean checkExpression(ExprNodeDesc expr) { return false; } + private boolean isConvertible(FetchData fetch) { + return isConvertible(fetch, fetch.scanOp, new HashSet>()); + } + + private boolean isConvertible(FetchData fetch, Operator operator, Set> traversed) { + if (operator instanceof ReduceSinkOperator || operator instanceof CommonJoinOperator + || operator instanceof ScriptOperator) { + return false; + } + if (!traversed.add(operator)) { + return true; + } + if (operator.getNumChild() == 0) { + if (operator instanceof FileSinkOperator) { + fetch.fileSink = operator; + return true; + } + return false; + } + for (Operator child : operator.getChildOperators()) { + if (!traversed.containsAll(child.getParentOperators())){ + continue; + } + if (!isConvertible(fetch, child, traversed)) { + return false; + } + } + return true; + } + private class FetchData { + // source table scan + private final TableScanOperator scanOp; private final ReadEntity parent; + private final Table table; private final SplitSample splitSample; private final PrunedPartitionList partsList; private final HashSet inputs = new HashSet(); private final boolean onlyPruningFilter; - // source table scan - private TableScanOperator scanOp; - // this is always non-null when conversion is completed private Operator fileSink; - private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = null; @@ -283,8 +344,9 @@ private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { this.onlyPruningFilter = false; } - private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, + private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, PrunedPartitionList partsList, SplitSample splitSample, boolean bypassFilter) { + this.scanOp = scanOp; this.parent = parent; this.table = table; this.partsList = partsList; @@ -302,7 +364,7 @@ public boolean hasOnlyPruningFilter() { private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { - inputs.add(new ReadEntity(table, parent, parent == null)); + inputs.add(new ReadEntity(table, parent, !table.isView() && parent == null)); FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table)); PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); work.setSplitSample(splitSample); @@ -395,8 +457,8 @@ private long getFileLength(JobConf conf, Path path, Class } public static ListSinkOperator replaceFSwithLS(Operator fileSink, String nullFormat) { - ListSinkOperator sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(nullFormat)); + ListSinkDesc desc = new ListSinkDesc(nullFormat); + ListSinkOperator sink = (ListSinkOperator) OperatorFactory.get(desc); sink.setParentOperators(new ArrayList>()); Operator parent = fileSink.getParentOperators().get(0); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index b15a46d..13f0a38 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -259,8 +259,15 @@ public boolean isTopLevelSelectStarQuery() { // to find target for fetch task conversion optimizer (not allows subqueries) public boolean isSimpleSelectQuery() { - return qbp.isSimpleSelectQuery() && aliasToSubq.isEmpty() && !isCTAS() && - !qbp.isAnalyzeCommand(); + if (!qbp.isSimpleSelectQuery() || isCTAS() || qbp.isAnalyzeCommand()) { + return false; + } + for (QBExpr qbexpr : aliasToSubq.values()) { + if (!qbexpr.isSimpleSelectQuery()) { + return false; + } + } + return true; } public boolean hasTableSample(String alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java index e923bca..36e65da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBExpr.java @@ -114,4 +114,10 @@ public void print(String msg) { } } + public boolean isSimpleSelectQuery() { + if (qb != null) { + return qb.isSimpleSelectQuery(); + } + return qbexpr1.isSimpleSelectQuery() && qbexpr2.isSimpleSelectQuery(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 02c4be9..3e51188 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -466,10 +466,12 @@ public boolean isTopLevelSimpleSelectStarQuery() { return true; } + // for fast check of possible existence of RS (will be checked again in SimpleFetchOptimizer) public boolean isSimpleSelectQuery() { - if (isSubQ || joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() + if (joinExpr != null || !destToOrderby.isEmpty() || !destToSortby.isEmpty() || !destToGroupby.isEmpty() || !destToClusterby.isEmpty() || !destToDistributeby.isEmpty() - || !aliasToLateralViews.isEmpty() || !destToLateralView.isEmpty()) { + || !destRollups.isEmpty() || !destCubes.isEmpty() || !destGroupingSets.isEmpty() + || !destToHaving.isEmpty()) { return false; } @@ -491,6 +493,7 @@ public boolean isSimpleSelectQuery() { } } + // exclude insert queries for (ASTNode v : nameToDest.values()) { if (!(v.getChild(0).getType() == HiveParser.TOK_TMP_FILE)) { return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 32d84ea..0d56828 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -99,8 +100,8 @@ public FetchWork(List partDir, List partDesc, public void initializeForFetch() { if (source == null) { - sink = new ListSinkOperator(); - sink.setConf(new ListSinkDesc(serializationNullFormat)); + ListSinkDesc desc = new ListSinkDesc(serializationNullFormat); + sink = (ListSinkOperator) OperatorFactory.get(desc); source = sink; } } diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q index 2a92d17..2a52888 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch.q +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -9,7 +9,6 @@ select * from src limit 10; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; --- negative, select expression explain select key from src limit 10; select key from src limit 10; @@ -62,6 +61,16 @@ select * from src TABLESAMPLE (0.25 PERCENT); explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); +-- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20; + +-- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20; + -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; @@ -78,8 +87,5 @@ explain create table srcx as select distinct key, value from src; -- negative, analyze explain analyze table src compute statistics; --- negative, subq -explain select a.* from (select * from src) a; - -- negative, join explain select * from src join src src2 on src.key=src2.key; diff --git ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q index b1a7cb5..959212b 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q +++ ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q @@ -3,6 +3,11 @@ set hive.fetch.task.conversion=more; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +set hive.fetch.task.conversion.threshold=10000; + +explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; +explain select cast(key as int) * 10, upper(value) from src limit 10; + set hive.fetch.task.conversion.threshold=100; -- from HIVE-7397, limit + partition pruning filter diff --git ql/src/test/results/clientpositive/lateral_view_noalias.q.out ql/src/test/results/clientpositive/lateral_view_noalias.q.out index e1445bf..aec8752 100644 --- ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -5,20 +5,38 @@ POSTHOOK: query: --HIVE-2608 Do not require AS a,b,c part in LATERAL VIEW EXPLAIN SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col5, _col6 + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE @@ -29,43 +47,7 @@ STAGE PLANS: Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Lateral View Join Operator - outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 2 - Processor Tree: - ListSink + ListSink PREHOOK: query: SELECT myTab.* from src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) myTab limit 2 PREHOOK: type: QUERY @@ -82,39 +64,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map('key1':100,'key2':200) (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map('key1':100,'key2':200) (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT explode(map('key1', 100, 'key2', 200)) from src limit 2 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index c6a4318..beb30a4 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -86,11 +86,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 val_278 2008-04-08 11 98 val_98 2008-04-08 11 484 val_484 2008-04-08 11 -PREHOOK: query: -- negative, select expression -explain select key from src limit 10 +PREHOOK: query: explain select key from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: -- negative, select expression -explain select key from src limit 10 +POSTHOOK: query: explain select key from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -787,6 +785,150 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 86 val_86 2008-04-09 11 12 238 val_238 2008-04-09 12 0 86 val_86 2008-04-09 12 12 +PREHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- sub query +explain +select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 200) and (key < 250)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value from (select value key,key value from src where key > 200) a where value < 250 limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_238 238 +val_224 224 +val_213 213 +val_209 209 +val_219 219 +val_237 237 +val_207 207 +val_208 208 +val_247 247 +val_203 203 +val_205 205 +val_221 221 +val_208 208 +val_239 239 +val_213 213 +val_216 216 +val_221 221 +val_241 241 +val_230 230 +val_217 217 +PREHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- lateral view +explain +select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + Select Operator + expressions: array(key,value) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 238 +238 val_238 +86 86 +86 val_86 +311 311 +311 val_311 +27 27 +27 val_27 +165 165 +165 val_165 +409 409 +409 val_409 +255 255 +255 val_255 +278 278 +278 val_278 +98 98 +98 val_98 +484 484 +484 val_484 PREHOOK: query: -- non deterministic func explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 PREHOOK: type: QUERY @@ -1029,29 +1171,6 @@ STAGE PLANS: Stage: Stage-1 Stats-Aggr Operator -PREHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -PREHOOK: type: QUERY -POSTHOOK: query: -- negative, subq -explain select a.* from (select * from src) a -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink - PREHOOK: query: -- negative, join explain select * from src join src src2 on src.key=src2.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index cb0d332..17e3c54 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -46,6 +46,54 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE ListSink +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/select_dummy_source.q.out ql/src/test/results/clientpositive/select_dummy_source.q.out index 08311f0..b1f0939 100644 --- ql/src/test/results/clientpositive/select_dummy_source.q.out +++ ql/src/test/results/clientpositive/select_dummy_source.q.out @@ -190,37 +190,25 @@ POSTHOOK: query: explain select explode(array('a', 'b')) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: array('a','b') (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: array('a','b') (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: select explode(array('a', 'b')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_explode.q.out ql/src/test/results/clientpositive/udf_explode.q.out index 301b1b7..b5fd7a2 100644 --- ql/src/test/results/clientpositive/udf_explode.q.out +++ ql/src/test/results/clientpositive/udf_explode.q.out @@ -39,101 +39,26 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src tablesample (1 rows)) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -383,101 +308,26 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns key,value - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.val, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) AS (key,val) FROM src tablesample (1 rows)) a GROUP BY a.key, a.val PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_inline.q.out ql/src/test/results/clientpositive/udf_inline.q.out index a9cde60..45bd463 100644 --- ql/src/test/results/clientpositive/udf_inline.q.out +++ ql/src/test/results/clientpositive/udf_inline.q.out @@ -20,39 +20,27 @@ POSTHOOK: query: explain SELECT inline( ) as (id, text) FROM SRC limit 2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE - function name: inline - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 2 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array>) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT inline( ARRAY( diff --git ql/src/test/results/clientpositive/udtf_explode.q.out ql/src/test/results/clientpositive/udtf_explode.q.out index 6213746..e1fcdee 100644 --- ql/src/test/results/clientpositive/udtf_explode.q.out +++ ql/src/test/results/clientpositive/udtf_explode.q.out @@ -38,103 +38,28 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: array(1,2,3) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns col - columns.types int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: array(1,2,3) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myCol, count(1) FROM (SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3) a GROUP BY a.myCol PREHOOK: type: QUERY @@ -419,39 +344,27 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: map(1:'one',2:'two',3:'three') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - UDTF Operator - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - function name: explode - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 3 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: map(1:'one',2:'two',3:'three') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 1557 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT a.myKey, a.myVal, count(1) FROM (SELECT explode(map(1,'one',2,'two',3,'three')) as (myKey,myVal) FROM src LIMIT 3) a GROUP BY a.myKey, a.myVal PREHOOK: type: QUERY